src/gallium/drivers/softpipe/sp_tex_sample.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 VMware, Inc.
   4  * All Rights Reserved.
   5  * Copyright 2008-2010 VMware, Inc.  All rights reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the
   9  * "Software"), to deal in the Software without restriction, including
  10  * without limitation the rights to use, copy, modify, merge, publish,
  11  * distribute, sub license, and/or sell copies of the Software, and to
  12  * permit persons to whom the Software is furnished to do so, subject to
  13  * the following conditions:
  14  *
  15  * The above copyright notice and this permission notice (including the
  16  * next paragraph) shall be included in all copies or substantial portions
  17  * of the Software.
  18  *
  19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  22  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26  *
  27  **************************************************************************/
  28
  29 /**
  30  * Texture sampling
  31  *
  32  * Authors:
  33  *   Brian Paul
  34  *   Keith Whitwell
  35  */
  36
  37 #include "pipe/p_context.h"
  38 #include "pipe/p_defines.h"
  39 #include "pipe/p_shader_tokens.h"
  40 #include "util/u_math.h"
  41 #include "util/u_format.h"
  42 #include "util/u_memory.h"
  43 #include "util/u_inlines.h"
  44 #include "sp_quad.h"   /* only for #define QUAD_* tokens */
  45 #include "sp_tex_sample.h"
  46 #include "sp_texture.h"
  47 #include "sp_tex_tile_cache.h"
  48
  49
  50 /** Set to one to help debug texture sampling */
  51 #define DEBUG_TEX 0
  52
  53
  54 /*
  55  * Return fractional part of 'f'.  Used for computing interpolation weights.
  56  * Need to be careful with negative values.
  57  * Note, if this function isn't perfect you'll sometimes see 1-pixel bands
  58  * of improperly weighted linear-filtered textures.
  59  * The tests/texwrap.c demo is a good test.
  60  */
  61 static INLINE float
  62 frac(float f)
  63 {
  64    return f - floorf(f);
  65 }
  66
  67
  68
  69 /**
  70  * Linear interpolation macro
  71  */
  72 static INLINE float
  73 lerp(float a, float v0, float v1)
  74 {
  75    return v0 + a * (v1 - v0);
  76 }
  77
  78
  79 /**
  80  * Do 2D/bilinear interpolation of float values.
  81  * v00, v10, v01 and v11 are typically four texture samples in a square/box.
  82  * a and b are the horizontal and vertical interpolants.
  83  * It's important that this function is inlined when compiled with
  84  * optimization!  If we find that's not true on some systems, convert
  85  * to a macro.
  86  */
  87 static INLINE float
  88 lerp_2d(float a, float b,
  89         float v00, float v10, float v01, float v11)
  90 {
  91    const float temp0 = lerp(a, v00, v10);
  92    const float temp1 = lerp(a, v01, v11);
  93    return lerp(b, temp0, temp1);
  94 }
  95
  96
  97 /**
  98  * As above, but 3D interpolation of 8 values.
  99  */
 100 static INLINE float
 101 lerp_3d(float a, float b, float c,
 102         float v000, float v100, float v010, float v110,
 103         float v001, float v101, float v011, float v111)
 104 {
 105    const float temp0 = lerp_2d(a, b, v000, v100, v010, v110);
 106    const float temp1 = lerp_2d(a, b, v001, v101, v011, v111);
 107    return lerp(c, temp0, temp1);
 108 }
 109
 110
 111
 112 /**
 113  * Compute coord % size for repeat wrap modes.
 114  * Note that if coord is negative, coord % size doesn't give the right
 115  * value.  To avoid that problem we add a large multiple of the size
 116  * (rather than using a conditional).
 117  */
 118 static INLINE int
 119 repeat(int coord, unsigned size)
 120 {
 121    return (coord + size * 1024) % size;
 122 }
 123
 124
 125 /**
 126  * Apply texture coord wrapping mode and return integer texture indexes
 127  * for a vector of four texcoords (S or T or P).
 128  * \param wrapMode  PIPE_TEX_WRAP_x
 129  * \param s  the incoming texcoords
 130  * \param size  the texture image size
 131  * \param icoord  returns the integer texcoords
 132  */
 133 static void
 134 wrap_nearest_repeat(float s, unsigned size, int *icoord)
 135 {
 136    /* s limited to [0,1) */
 137    /* i limited to [0,size-1] */
 138    int i = util_ifloor(s * size);
 139    *icoord = repeat(i, size);
 140 }
 141
 142
 143 static void
 144 wrap_nearest_clamp(float s, unsigned size, int *icoord)
 145 {
 146    /* s limited to [0,1] */
 147    /* i limited to [0,size-1] */
 148    if (s <= 0.0F)
 149       *icoord = 0;
 150    else if (s >= 1.0F)
 151       *icoord = size - 1;
 152    else
 153       *icoord = util_ifloor(s * size);
 154 }
 155
 156
 157 static void
 158 wrap_nearest_clamp_to_edge(float s, unsigned size, int *icoord)
 159 {
 160    /* s limited to [min,max] */
 161    /* i limited to [0, size-1] */
 162    const float min = 1.0F / (2.0F * size);
 163    const float max = 1.0F - min;
 164    if (s < min)
 165       *icoord = 0;
 166    else if (s > max)
 167       *icoord = size - 1;
 168    else
 169       *icoord = util_ifloor(s * size);
 170 }
 171
 172
 173 static void
 174 wrap_nearest_clamp_to_border(float s, unsigned size, int *icoord)
 175 {
 176    /* s limited to [min,max] */
 177    /* i limited to [-1, size] */
 178    const float min = -1.0F / (2.0F * size);
 179    const float max = 1.0F - min;
 180    if (s <= min)
 181       *icoord = -1;
 182    else if (s >= max)
 183       *icoord = size;
 184    else
 185       *icoord = util_ifloor(s * size);
 186 }
 187
 188
 189 static void
 190 wrap_nearest_mirror_repeat(float s, unsigned size, int *icoord)
 191 {
 192    const float min = 1.0F / (2.0F * size);
 193    const float max = 1.0F - min;
 194    const int flr = util_ifloor(s);
 195    float u = frac(s);
 196    if (flr & 1)
 197       u = 1.0F - u;
 198    if (u < min)
 199       *icoord = 0;
 200    else if (u > max)
 201       *icoord = size - 1;
 202    else
 203       *icoord = util_ifloor(u * size);
 204 }
 205
 206
 207 static void
 208 wrap_nearest_mirror_clamp(float s, unsigned size, int *icoord)
 209 {
 210    /* s limited to [0,1] */
 211    /* i limited to [0,size-1] */
 212    const float u = fabsf(s);
 213    if (u <= 0.0F)
 214       *icoord = 0;
 215    else if (u >= 1.0F)
 216       *icoord = size - 1;
 217    else
 218       *icoord = util_ifloor(u * size);
 219 }
 220
 221
 222 static void
 223 wrap_nearest_mirror_clamp_to_edge(float s, unsigned size, int *icoord)
 224 {
 225    /* s limited to [min,max] */
 226    /* i limited to [0, size-1] */
 227    const float min = 1.0F / (2.0F * size);
 228    const float max = 1.0F - min;
 229    const float u = fabsf(s);
 230    if (u < min)
 231       *icoord = 0;
 232    else if (u > max)
 233       *icoord = size - 1;
 234    else
 235       *icoord = util_ifloor(u * size);
 236 }
 237
 238
 239 static void
 240 wrap_nearest_mirror_clamp_to_border(float s, unsigned size, int *icoord)
 241 {
 242    /* s limited to [min,max] */
 243    /* i limited to [0, size-1] */
 244    const float min = -1.0F / (2.0F * size);
 245    const float max = 1.0F - min;
 246    const float u = fabsf(s);
 247    if (u < min)
 248       *icoord = -1;
 249    else if (u > max)
 250       *icoord = size;
 251    else
 252       *icoord = util_ifloor(u * size);
 253 }
 254
 255
 256 /**
 257  * Used to compute texel locations for linear sampling
 258  * \param wrapMode  PIPE_TEX_WRAP_x
 259  * \param s  the texcoord
 260  * \param size  the texture image size
 261  * \param icoord0  returns first texture index
 262  * \param icoord1  returns second texture index (usually icoord0 + 1)
 263  * \param w  returns blend factor/weight between texture indices
 264  * \param icoord  returns the computed integer texture coord
 265  */
 266 static void
 267 wrap_linear_repeat(float s, unsigned size,
 268                    int *icoord0, int *icoord1, float *w)
 269 {
 270    float u = s * size - 0.5F;
 271    *icoord0 = repeat(util_ifloor(u), size);
 272    *icoord1 = repeat(*icoord0 + 1, size);
 273    *w = frac(u);
 274 }
 275
 276
 277 static void
 278 wrap_linear_clamp(float s, unsigned size,
 279                   int *icoord0, int *icoord1, float *w)
 280 {
 281    float u = CLAMP(s, 0.0F, 1.0F);
 282    u = u * size - 0.5f;
 283    *icoord0 = util_ifloor(u);
 284    *icoord1 = *icoord0 + 1;
 285    *w = frac(u);
 286 }
 287
 288
 289 static void
 290 wrap_linear_clamp_to_edge(float s, unsigned size,
 291                           int *icoord0, int *icoord1, float *w)
 292 {
 293    float u = CLAMP(s, 0.0F, 1.0F);
 294    u = u * size - 0.5f;
 295    *icoord0 = util_ifloor(u);
 296    *icoord1 = *icoord0 + 1;
 297    if (*icoord0 < 0)
 298       *icoord0 = 0;
 299    if (*icoord1 >= (int) size)
 300       *icoord1 = size - 1;
 301    *w = frac(u);
 302 }
 303
 304
 305 static void
 306 wrap_linear_clamp_to_border(float s, unsigned size,
 307                             int *icoord0, int *icoord1, float *w)
 308 {
 309    const float min = -1.0F / (2.0F * size);
 310    const float max = 1.0F - min;
 311    float u = CLAMP(s, min, max);
 312    u = u * size - 0.5f;
 313    *icoord0 = util_ifloor(u);
 314    *icoord1 = *icoord0 + 1;
 315    *w = frac(u);
 316 }
 317
 318
 319 static void
 320 wrap_linear_mirror_repeat(float s, unsigned size,
 321                           int *icoord0, int *icoord1, float *w)
 322 {
 323    const int flr = util_ifloor(s);
 324    float u = frac(s);
 325    if (flr & 1)
 326       u = 1.0F - u;
 327    u = u * size - 0.5F;
 328    *icoord0 = util_ifloor(u);
 329    *icoord1 = *icoord0 + 1;
 330    if (*icoord0 < 0)
 331       *icoord0 = 0;
 332    if (*icoord1 >= (int) size)
 333       *icoord1 = size - 1;
 334    *w = frac(u);
 335 }
 336
 337
 338 static void
 339 wrap_linear_mirror_clamp(float s, unsigned size,
 340                          int *icoord0, int *icoord1, float *w)
 341 {
 342    float u = fabsf(s);
 343    if (u >= 1.0F)
 344       u = (float) size;
 345    else
 346       u *= size;
 347    u -= 0.5F;
 348    *icoord0 = util_ifloor(u);
 349    *icoord1 = *icoord0 + 1;
 350    *w = frac(u);
 351 }
 352
 353
 354 static void
 355 wrap_linear_mirror_clamp_to_edge(float s, unsigned size,
 356                                  int *icoord0, int *icoord1, float *w)
 357 {
 358    float u = fabsf(s);
 359    if (u >= 1.0F)
 360       u = (float) size;
 361    else
 362       u *= size;
 363    u -= 0.5F;
 364    *icoord0 = util_ifloor(u);
 365    *icoord1 = *icoord0 + 1;
 366    if (*icoord0 < 0)
 367       *icoord0 = 0;
 368    if (*icoord1 >= (int) size)
 369       *icoord1 = size - 1;
 370    *w = frac(u);
 371 }
 372
 373
 374 static void
 375 wrap_linear_mirror_clamp_to_border(float s, unsigned size,
 376                                    int *icoord0, int *icoord1, float *w)
 377 {
 378    const float min = -1.0F / (2.0F * size);
 379    const float max = 1.0F - min;
 380    float u = fabsf(s);
 381    if (u <= min)
 382       u = min * size;
 383    else if (u >= max)
 384       u = max * size;
 385    else
 386       u *= size;
 387    u -= 0.5F;
 388    *icoord0 = util_ifloor(u);
 389    *icoord1 = *icoord0 + 1;
 390    *w = frac(u);
 391 }
 392
 393
 394 /**
 395  * PIPE_TEX_WRAP_CLAMP for nearest sampling, unnormalized coords.
 396  */
 397 static void
 398 wrap_nearest_unorm_clamp(float s, unsigned size, int *icoord)
 399 {
 400    int i = util_ifloor(s);
 401    *icoord = CLAMP(i, 0, (int) size-1);
 402 }
 403
 404
 405 /**
 406  * PIPE_TEX_WRAP_CLAMP_TO_BORDER for nearest sampling, unnormalized coords.
 407  */
 408 static void
 409 wrap_nearest_unorm_clamp_to_border(float s, unsigned size, int *icoord)
 410 {
 411    *icoord = util_ifloor( CLAMP(s, -0.5F, (float) size + 0.5F) );
 412 }
 413
 414
 415 /**
 416  * PIPE_TEX_WRAP_CLAMP_TO_EDGE for nearest sampling, unnormalized coords.
 417  */
 418 static void
 419 wrap_nearest_unorm_clamp_to_edge(float s, unsigned size, int *icoord)
 420 {
 421    *icoord = util_ifloor( CLAMP(s, 0.5F, (float) size - 0.5F) );
 422 }
 423
 424
 425 /**
 426  * PIPE_TEX_WRAP_CLAMP for linear sampling, unnormalized coords.
 427  */
 428 static void
 429 wrap_linear_unorm_clamp(float s, unsigned size,
 430                         int *icoord0, int *icoord1, float *w)
 431 {
 432    /* Not exactly what the spec says, but it matches NVIDIA output */
 433    float u = CLAMP(s - 0.5F, 0.0f, (float) size - 1.0f);
 434    *icoord0 = util_ifloor(u);
 435    *icoord1 = *icoord0 + 1;
 436    *w = frac(u);
 437 }
 438
 439
 440 /**
 441  * PIPE_TEX_WRAP_CLAMP_TO_BORDER for linear sampling, unnormalized coords.
 442  */
 443 static void
 444 wrap_linear_unorm_clamp_to_border(float s, unsigned size,
 445                                   int *icoord0, int *icoord1, float *w)
 446 {
 447    float u = CLAMP(s, -0.5F, (float) size + 0.5F);
 448    u -= 0.5F;
 449    *icoord0 = util_ifloor(u);
 450    *icoord1 = *icoord0 + 1;
 451    if (*icoord1 > (int) size - 1)
 452       *icoord1 = size - 1;
 453    *w = frac(u);
 454 }
 455
 456
 457 /**
 458  * PIPE_TEX_WRAP_CLAMP_TO_EDGE for linear sampling, unnormalized coords.
 459  */
 460 static void
 461 wrap_linear_unorm_clamp_to_edge(float s, unsigned size,
 462                                 int *icoord0, int *icoord1, float *w)
 463 {
 464    float u = CLAMP(s, +0.5F, (float) size - 0.5F);
 465    u -= 0.5F;
 466    *icoord0 = util_ifloor(u);
 467    *icoord1 = *icoord0 + 1;
 468    if (*icoord1 > (int) size - 1)
 469       *icoord1 = size - 1;
 470    *w = frac(u);
 471 }
 472
 473
 474 /**
 475  * Do coordinate to array index conversion.  For array textures.
 476  */
 477 static INLINE int
 478 coord_to_layer(float coord, unsigned first_layer, unsigned last_layer)
 479 {
 480    int c = util_ifloor(coord + 0.5F);
 481    return CLAMP(c, (int)first_layer, (int)last_layer);
 482 }
 483
 484
 485 /**
 486  * Examine the quad's texture coordinates to compute the partial
 487  * derivatives w.r.t X and Y, then compute lambda (level of detail).
 488  */
 489 static float
 490 compute_lambda_1d(const struct sp_sampler_view *sview,
 491                   const float s[TGSI_QUAD_SIZE],
 492                   const float t[TGSI_QUAD_SIZE],
 493                   const float p[TGSI_QUAD_SIZE])
 494 {
 495    const struct pipe_resource *texture = sview->base.texture;
 496    float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
 497    float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
 498    float rho = MAX2(dsdx, dsdy) * u_minify(texture->width0, sview->base.u.tex.first_level);
 499
 500    return util_fast_log2(rho);
 501 }
 502
 503
 504 static float
 505 compute_lambda_2d(const struct sp_sampler_view *sview,
 506                   const float s[TGSI_QUAD_SIZE],
 507                   const float t[TGSI_QUAD_SIZE],
 508                   const float p[TGSI_QUAD_SIZE])
 509 {
 510    const struct pipe_resource *texture = sview->base.texture;
 511    float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
 512    float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
 513    float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
 514    float dtdy = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
 515    float maxx = MAX2(dsdx, dsdy) * u_minify(texture->width0, sview->base.u.tex.first_level);
 516    float maxy = MAX2(dtdx, dtdy) * u_minify(texture->height0, sview->base.u.tex.first_level);
 517    float rho  = MAX2(maxx, maxy);
 518
 519    return util_fast_log2(rho);
 520 }
 521
 522
 523 static float
 524 compute_lambda_3d(const struct sp_sampler_view *sview,
 525                   const float s[TGSI_QUAD_SIZE],
 526                   const float t[TGSI_QUAD_SIZE],
 527                   const float p[TGSI_QUAD_SIZE])
 528 {
 529    const struct pipe_resource *texture = sview->base.texture;
 530    float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
 531    float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
 532    float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
 533    float dtdy = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
 534    float dpdx = fabsf(p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]);
 535    float dpdy = fabsf(p[QUAD_TOP_LEFT]     - p[QUAD_BOTTOM_LEFT]);
 536    float maxx = MAX2(dsdx, dsdy) * u_minify(texture->width0, sview->base.u.tex.first_level);
 537    float maxy = MAX2(dtdx, dtdy) * u_minify(texture->height0, sview->base.u.tex.first_level);
 538    float maxz = MAX2(dpdx, dpdy) * u_minify(texture->depth0, sview->base.u.tex.first_level);
 539    float rho;
 540
 541    rho = MAX2(maxx, maxy);
 542    rho = MAX2(rho, maxz);
 543
 544    return util_fast_log2(rho);
 545 }
 546
 547
 548 /**
 549  * Compute lambda for a vertex texture sampler.
 550  * Since there aren't derivatives to use, just return 0.
 551  */
 552 static float
 553 compute_lambda_vert(const struct sp_sampler_view *sview,
 554                     const float s[TGSI_QUAD_SIZE],
 555                     const float t[TGSI_QUAD_SIZE],
 556                     const float p[TGSI_QUAD_SIZE])
 557 {
 558    return 0.0f;
 559 }
 560
 561
 562
 563 /**
 564  * Get a texel from a texture, using the texture tile cache.
 565  *
 566  * \param addr  the template tex address containing cube, z, face info.
 567  * \param x  the x coord of texel within 2D image
 568  * \param y  the y coord of texel within 2D image
 569  * \param rgba  the quad to put the texel/color into
 570  *
 571  * XXX maybe move this into sp_tex_tile_cache.c and merge with the
 572  * sp_get_cached_tile_tex() function.
 573  */
 574
 575
 576
 577
 578 static INLINE const float *
 579 get_texel_2d_no_border(const struct sp_sampler_view *sp_sview,
 580                        union tex_tile_address addr, int x, int y)
 581 {
 582    const struct softpipe_tex_cached_tile *tile;
 583    addr.bits.x = x / TEX_TILE_SIZE;
 584    addr.bits.y = y / TEX_TILE_SIZE;
 585    y %= TEX_TILE_SIZE;
 586    x %= TEX_TILE_SIZE;
 587
 588    tile = sp_get_cached_tile_tex(sp_sview->cache, addr);
 589
 590    return &tile->data.color[y][x][0];
 591 }
 592
 593
 594 static INLINE const float *
 595 get_texel_2d(const struct sp_sampler_view *sp_sview,
 596              const struct sp_sampler *sp_samp,
 597              union tex_tile_address addr, int x, int y)
 598 {
 599    const struct pipe_resource *texture = sp_sview->base.texture;
 600    unsigned level = addr.bits.level;
 601
 602    if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
 603        y < 0 || y >= (int) u_minify(texture->height0, level)) {
 604       return sp_samp->base.border_color.f;
 605    }
 606    else {
 607       return get_texel_2d_no_border( sp_sview, addr, x, y );
 608    }
 609 }
 610
 611
 612 /*
 613  * Here's the complete logic (HOLY CRAP) for finding next face and doing the
 614  * corresponding coord wrapping, implemented by get_next_face,
 615  * get_next_xcoord, get_next_ycoord.
 616  * Read like that (first line):
 617  * If face is +x and s coord is below zero, then
 618  * new face is +z, new s is max , new t is old t
 619  * (max is always cube size - 1).
 620  *
 621  * +x s- -> +z: s = max,   t = t
 622  * +x s+ -> -z: s = 0,     t = t
 623  * +x t- -> +y: s = max,   t = max-s
 624  * +x t+ -> -y: s = max,   t = s
 625  *
 626  * -x s- -> -z: s = max,   t = t
 627  * -x s+ -> +z: s = 0,     t = t
 628  * -x t- -> +y: s = 0,     t = s
 629  * -x t+ -> -y: s = 0,     t = max-s
 630  *
 631  * +y s- -> -x: s = t,     t = 0
 632  * +y s+ -> +x: s = max-t, t = 0
 633  * +y t- -> -z: s = max-s, t = 0
 634  * +y t+ -> +z: s = s,     t = 0
 635  *
 636  * -y s- -> -x: s = max-t, t = max
 637  * -y s+ -> +x: s = t,     t = max
 638  * -y t- -> +z: s = s,     t = max
 639  * -y t+ -> -z: s = max-s, t = max
 640
 641  * +z s- -> -x: s = max,   t = t
 642  * +z s+ -> +x: s = 0,     t = t
 643  * +z t- -> +y: s = s,     t = max
 644  * +z t+ -> -y: s = s,     t = 0
 645
 646  * -z s- -> +x: s = max,   t = t
 647  * -z s+ -> -x: s = 0,     t = t
 648  * -z t- -> +y: s = max-s, t = 0
 649  * -z t+ -> -y: s = max-s, t = max
 650  */
 651
 652
 653 /*
 654  * seamless cubemap neighbour array.
 655  * this array is used to find the adjacent face in each of 4 directions,
 656  * left, right, up, down. (or -x, +x, -y, +y).
 657  */
 658 static const unsigned face_array[PIPE_TEX_FACE_MAX][4] = {
 659    /* pos X first then neg X is Z different, Y the same */
 660    /* PIPE_TEX_FACE_POS_X,*/
 661    { PIPE_TEX_FACE_POS_Z, PIPE_TEX_FACE_NEG_Z,
 662      PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y },
 663    /* PIPE_TEX_FACE_NEG_X */
 664    { PIPE_TEX_FACE_NEG_Z, PIPE_TEX_FACE_POS_Z,
 665      PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y },
 666
 667    /* pos Y first then neg Y is X different, X the same */
 668    /* PIPE_TEX_FACE_POS_Y */
 669    { PIPE_TEX_FACE_NEG_X, PIPE_TEX_FACE_POS_X,
 670      PIPE_TEX_FACE_NEG_Z, PIPE_TEX_FACE_POS_Z },
 671
 672    /* PIPE_TEX_FACE_NEG_Y */
 673    { PIPE_TEX_FACE_NEG_X, PIPE_TEX_FACE_POS_X,
 674      PIPE_TEX_FACE_POS_Z, PIPE_TEX_FACE_NEG_Z },
 675
 676    /* pos Z first then neg Y is X different, X the same */
 677    /* PIPE_TEX_FACE_POS_Z */
 678    { PIPE_TEX_FACE_NEG_X, PIPE_TEX_FACE_POS_X,
 679      PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y },
 680
 681    /* PIPE_TEX_FACE_NEG_Z */
 682    { PIPE_TEX_FACE_POS_X, PIPE_TEX_FACE_NEG_X,
 683      PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y }
 684 };
 685
 686 static INLINE unsigned
 687 get_next_face(unsigned face, int idx)
 688 {
 689    return face_array[face][idx];
 690 }
 691
 692 /*
 693  * return a new xcoord based on old face, old coords, cube size
 694  * and fall_off_index (0 for x-, 1 for x+, 2 for y-, 3 for y+)
 695  */
 696 static INLINE int
 697 get_next_xcoord(unsigned face, unsigned fall_off_index, int max, int xc, int yc)
 698 {
 699    if ((face == 0 && fall_off_index != 1) ||
 700        (face == 1 && fall_off_index == 0) ||
 701        (face == 4 && fall_off_index == 0) ||
 702        (face == 5 && fall_off_index == 0)) {
 703       return max;
 704    }
 705    if ((face == 1 && fall_off_index != 0) ||
 706        (face == 0 && fall_off_index == 1) ||
 707        (face == 4 && fall_off_index == 1) ||
 708        (face == 5 && fall_off_index == 1)) {
 709       return 0;
 710    }
 711    if ((face == 4 && fall_off_index >= 2) ||
 712        (face == 2 && fall_off_index == 3) ||
 713        (face == 3 && fall_off_index == 2)) {
 714       return xc;
 715    }
 716    if ((face == 5 && fall_off_index >= 2) ||
 717        (face == 2 && fall_off_index == 2) ||
 718        (face == 3 && fall_off_index == 3)) {
 719       return max - xc;
 720    }
 721    if ((face == 2 && fall_off_index == 0) ||
 722        (face == 3 && fall_off_index == 1)) {
 723       return yc;
 724    }
 725    /* (face == 2 && fall_off_index == 1) ||
 726       (face == 3 && fall_off_index == 0)) */
 727    return max - yc;
 728 }
 729
 730 /*
 731  * return a new ycoord based on old face, old coords, cube size
 732  * and fall_off_index (0 for x-, 1 for x+, 2 for y-, 3 for y+)
 733  */
 734 static INLINE int
 735 get_next_ycoord(unsigned face, unsigned fall_off_index, int max, int xc, int yc)
 736 {
 737    if ((fall_off_index <= 1) && (face <= 1 || face >= 4)) {
 738       return yc;
 739    }
 740    if (face == 2 ||
 741        (face == 4 && fall_off_index == 3) ||
 742        (face == 5 && fall_off_index == 2)) {
 743       return 0;
 744    }
 745    if (face == 3 ||
 746        (face == 4 && fall_off_index == 2) ||
 747        (face == 5 && fall_off_index == 3)) {
 748       return max;
 749    }
 750    if ((face == 0 && fall_off_index == 3) ||
 751        (face == 1 && fall_off_index == 2)) {
 752       return xc;
 753    }
 754    /* (face == 0 && fall_off_index == 2) ||
 755       (face == 1 && fall_off_index == 3) */
 756    return max - xc;
 757 }
 758
 759
 760 /* Gather a quad of adjacent texels within a tile:
 761  */
 762 static INLINE void
 763 get_texel_quad_2d_no_border_single_tile(const struct sp_sampler_view *sp_sview,
 764                                         union tex_tile_address addr,
 765                                         unsigned x, unsigned y,
 766                                         const float *out[4])
 767 {
 768     const struct softpipe_tex_cached_tile *tile;
 769
 770    addr.bits.x = x / TEX_TILE_SIZE;
 771    addr.bits.y = y / TEX_TILE_SIZE;
 772    y %= TEX_TILE_SIZE;
 773    x %= TEX_TILE_SIZE;
 774
 775    tile = sp_get_cached_tile_tex(sp_sview->cache, addr);
 776
 777    out[0] = &tile->data.color[y  ][x  ][0];
 778    out[1] = &tile->data.color[y  ][x+1][0];
 779    out[2] = &tile->data.color[y+1][x  ][0];
 780    out[3] = &tile->data.color[y+1][x+1][0];
 781 }
 782
 783
 784 /* Gather a quad of potentially non-adjacent texels:
 785  */
 786 static INLINE void
 787 get_texel_quad_2d_no_border(const struct sp_sampler_view *sp_sview,
 788                             union tex_tile_address addr,
 789                             int x0, int y0,
 790                             int x1, int y1,
 791                             const float *out[4])
 792 {
 793    out[0] = get_texel_2d_no_border( sp_sview, addr, x0, y0 );
 794    out[1] = get_texel_2d_no_border( sp_sview, addr, x1, y0 );
 795    out[2] = get_texel_2d_no_border( sp_sview, addr, x0, y1 );
 796    out[3] = get_texel_2d_no_border( sp_sview, addr, x1, y1 );
 797 }
 798
 799 /* Can involve a lot of unnecessary checks for border color:
 800  */
 801 static INLINE void
 802 get_texel_quad_2d(const struct sp_sampler_view *sp_sview,
 803                   const struct sp_sampler *sp_samp,
 804                   union tex_tile_address addr,
 805                   int x0, int y0,
 806                   int x1, int y1,
 807                   const float *out[4])
 808 {
 809    out[0] = get_texel_2d( sp_sview, sp_samp, addr, x0, y0 );
 810    out[1] = get_texel_2d( sp_sview, sp_samp, addr, x1, y0 );
 811    out[3] = get_texel_2d( sp_sview, sp_samp, addr, x1, y1 );
 812    out[2] = get_texel_2d( sp_sview, sp_samp, addr, x0, y1 );
 813 }
 814
 815
 816
 817 /* 3d variants:
 818  */
 819 static INLINE const float *
 820 get_texel_3d_no_border(const struct sp_sampler_view *sp_sview,
 821                        union tex_tile_address addr, int x, int y, int z)
 822 {
 823    const struct softpipe_tex_cached_tile *tile;
 824
 825    addr.bits.x = x / TEX_TILE_SIZE;
 826    addr.bits.y = y / TEX_TILE_SIZE;
 827    addr.bits.z = z;
 828    y %= TEX_TILE_SIZE;
 829    x %= TEX_TILE_SIZE;
 830
 831    tile = sp_get_cached_tile_tex(sp_sview->cache, addr);
 832
 833    return &tile->data.color[y][x][0];
 834 }
 835
 836
 837 static INLINE const float *
 838 get_texel_3d(const struct sp_sampler_view *sp_sview,
 839              const struct sp_sampler *sp_samp,
 840              union tex_tile_address addr, int x, int y, int z)
 841 {
 842    const struct pipe_resource *texture = sp_sview->base.texture;
 843    unsigned level = addr.bits.level;
 844
 845    if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
 846        y < 0 || y >= (int) u_minify(texture->height0, level) ||
 847        z < 0 || z >= (int) u_minify(texture->depth0, level)) {
 848       return sp_samp->base.border_color.f;
 849    }
 850    else {
 851       return get_texel_3d_no_border( sp_sview, addr, x, y, z );
 852    }
 853 }
 854
 855
 856 /* Get texel pointer for 1D array texture */
 857 static INLINE const float *
 858 get_texel_1d_array(const struct sp_sampler_view *sp_sview,
 859                    const struct sp_sampler *sp_samp,
 860                    union tex_tile_address addr, int x, int y)
 861 {
 862    const struct pipe_resource *texture = sp_sview->base.texture;
 863    unsigned level = addr.bits.level;
 864
 865    if (x < 0 || x >= (int) u_minify(texture->width0, level)) {
 866       return sp_samp->base.border_color.f;
 867    }
 868    else {
 869       return get_texel_2d_no_border(sp_sview, addr, x, y);
 870    }
 871 }
 872
 873
 874 /* Get texel pointer for 2D array texture */
 875 static INLINE const float *
 876 get_texel_2d_array(const struct sp_sampler_view *sp_sview,
 877                    const struct sp_sampler *sp_samp,
 878                    union tex_tile_address addr, int x, int y, int layer)
 879 {
 880    const struct pipe_resource *texture = sp_sview->base.texture;
 881    unsigned level = addr.bits.level;
 882
 883    assert(layer < (int) texture->array_size);
 884    assert(layer >= 0);
 885
 886    if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
 887        y < 0 || y >= (int) u_minify(texture->height0, level)) {
 888       return sp_samp->base.border_color.f;
 889    }
 890    else {
 891       return get_texel_3d_no_border(sp_sview, addr, x, y, layer);
 892    }
 893 }
 894
 895
 896 static INLINE const float *
 897 get_texel_cube_seamless(const struct sp_sampler_view *sp_sview,
 898                         union tex_tile_address addr, int x, int y,
 899                         float *corner, int layer, unsigned face)
 900 {
 901    const struct pipe_resource *texture = sp_sview->base.texture;
 902    unsigned level = addr.bits.level;
 903    int new_x, new_y, max_x;
 904
 905    max_x = (int) u_minify(texture->width0, level);
 906
 907    assert(texture->width0 == texture->height0);
 908    new_x = x;
 909    new_y = y;
 910
 911    /* change the face */
 912    if (x < 0) {
 913       /*
 914        * Cheat with corners. They are difficult and I believe because we don't get
 915        * per-pixel faces we can actually have multiple corner texels per pixel,
 916        * which screws things up majorly in any case (as the per spec behavior is
 917        * to average the 3 remaining texels, which we might not have).
 918        * Hence just make sure that the 2nd coord is clamped, will simply pick the
 919        * sample which would have fallen off the x coord, but not y coord.
 920        * So the filter weight of the samples will be wrong, but at least this
 921        * ensures that only valid texels near the corner are used.
 922        */
 923       if (y < 0 || y >= max_x) {
 924          y = CLAMP(y, 0, max_x - 1);
 925       }
 926       new_x = get_next_xcoord(face, 0, max_x -1, x, y);
 927       new_y = get_next_ycoord(face, 0, max_x -1, x, y);
 928       face = get_next_face(face, 0);
 929    } else if (x >= max_x) {
 930       if (y < 0 || y >= max_x) {
 931          y = CLAMP(y, 0, max_x - 1);
 932       }
 933       new_x = get_next_xcoord(face, 1, max_x -1, x, y);
 934       new_y = get_next_ycoord(face, 1, max_x -1, x, y);
 935       face = get_next_face(face, 1);
 936    } else if (y < 0) {
 937       new_x = get_next_xcoord(face, 2, max_x -1, x, y);
 938       new_y = get_next_ycoord(face, 2, max_x -1, x, y);
 939       face = get_next_face(face, 2);
 940    } else if (y >= max_x) {
 941       new_x = get_next_xcoord(face, 3, max_x -1, x, y);
 942       new_y = get_next_ycoord(face, 3, max_x -1, x, y);
 943       face = get_next_face(face, 3);
 944    }
 945
 946    return get_texel_3d_no_border(sp_sview, addr, new_x, new_y, layer + face);
 947 }
 948
 949
 950 /* Get texel pointer for cube array texture */
 951 static INLINE const float *
 952 get_texel_cube_array(const struct sp_sampler_view *sp_sview,
 953                      const struct sp_sampler *sp_samp,
 954                      union tex_tile_address addr, int x, int y, int layer)
 955 {
 956    const struct pipe_resource *texture = sp_sview->base.texture;
 957    unsigned level = addr.bits.level;
 958
 959    assert(layer < (int) texture->array_size);
 960    assert(layer >= 0);
 961
 962    if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
 963        y < 0 || y >= (int) u_minify(texture->height0, level)) {
 964       return sp_samp->base.border_color.f;
 965    }
 966    else {
 967       return get_texel_3d_no_border(sp_sview, addr, x, y, layer);
 968    }
 969 }
 970 /**
 971  * Given the logbase2 of a mipmap's base level size and a mipmap level,
 972  * return the size (in texels) of that mipmap level.
 973  * For example, if level[0].width = 256 then base_pot will be 8.
 974  * If level = 2, then we'll return 64 (the width at level=2).
 975  * Return 1 if level > base_pot.
 976  */
 977 static INLINE unsigned
 978 pot_level_size(unsigned base_pot, unsigned level)
 979 {
 980    return (base_pot >= level) ? (1 << (base_pot - level)) : 1;
 981 }
 982
 983
 984 static void
 985 print_sample(const char *function, const float *rgba)
 986 {
 987    debug_printf("%s %g %g %g %g\n",
 988                 function,
 989                 rgba[0], rgba[TGSI_NUM_CHANNELS], rgba[2*TGSI_NUM_CHANNELS], rgba[3*TGSI_NUM_CHANNELS]);
 990 }
 991
 992
 993 static void
 994 print_sample_4(const char *function, float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
 995 {
 996    debug_printf("%s %g %g %g %g, %g %g %g %g, %g %g %g %g, %g %g %g %g\n",
 997                 function,
 998                 rgba[0][0], rgba[1][0], rgba[2][0], rgba[3][0],
 999                 rgba[0][1], rgba[1][1], rgba[2][1], rgba[3][1],
1000                 rgba[0][2], rgba[1][2], rgba[2][2], rgba[3][2],
1001                 rgba[0][3], rgba[1][3], rgba[2][3], rgba[3][3]);
1002 }
1003
1004
1005 /* Some image-filter fastpaths:
1006  */
1007 static INLINE void
1008 img_filter_2d_linear_repeat_POT(struct sp_sampler_view *sp_sview,
1009                                 struct sp_sampler *sp_samp,
1010                                 const struct img_filter_args *args,
1011                                 float *rgba)
1012 {
1013    unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
1014    unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
1015    int xmax = (xpot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, xpot) - 1; */
1016    int ymax = (ypot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, ypot) - 1; */
1017    union tex_tile_address addr;
1018    int c;
1019
1020    float u = args->s * xpot - 0.5F;
1021    float v = args->t * ypot - 0.5F;
1022
1023    int uflr = util_ifloor(u);
1024    int vflr = util_ifloor(v);
1025
1026    float xw = u - (float)uflr;
1027    float yw = v - (float)vflr;
1028
1029    int x0 = uflr & (xpot - 1);
1030    int y0 = vflr & (ypot - 1);
1031
1032    const float *tx[4];
1033
1034    addr.value = 0;
1035    addr.bits.level = args->level;
1036
1037    /* Can we fetch all four at once:
1038     */
1039    if (x0 < xmax && y0 < ymax) {
1040       get_texel_quad_2d_no_border_single_tile(sp_sview, addr, x0, y0, tx);
1041    }
1042    else {
1043       unsigned x1 = (x0 + 1) & (xpot - 1);
1044       unsigned y1 = (y0 + 1) & (ypot - 1);
1045       get_texel_quad_2d_no_border(sp_sview, addr, x0, y0, x1, y1, tx);
1046    }
1047
1048    /* interpolate R, G, B, A */
1049    for (c = 0; c < TGSI_QUAD_SIZE; c++) {
1050       rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1051                                        tx[0][c], tx[1][c],
1052                                        tx[2][c], tx[3][c]);
1053    }
1054
1055    if (DEBUG_TEX) {
1056       print_sample(__FUNCTION__, rgba);
1057    }
1058 }
1059
1060
1061 static INLINE void
1062 img_filter_2d_nearest_repeat_POT(struct sp_sampler_view *sp_sview,
1063                                  struct sp_sampler *sp_samp,
1064                                  const struct img_filter_args *args,
1065                                  float rgba[TGSI_QUAD_SIZE])
1066 {
1067    unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
1068    unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
1069    const float *out;
1070    union tex_tile_address addr;
1071    int c;
1072
1073    float u = args->s * xpot;
1074    float v = args->t * ypot;
1075
1076    int uflr = util_ifloor(u);
1077    int vflr = util_ifloor(v);
1078
1079    int x0 = uflr & (xpot - 1);
1080    int y0 = vflr & (ypot - 1);
1081
1082    addr.value = 0;
1083    addr.bits.level = args->level;
1084
1085    out = get_texel_2d_no_border(sp_sview, addr, x0, y0);
1086    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1087       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1088
1089    if (DEBUG_TEX) {
1090       print_sample(__FUNCTION__, rgba);
1091    }
1092 }
1093
1094
1095 static INLINE void
1096 img_filter_2d_nearest_clamp_POT(struct sp_sampler_view *sp_sview,
1097                                 struct sp_sampler *sp_samp,
1098                                 const struct img_filter_args *args,
1099                                 float rgba[TGSI_QUAD_SIZE])
1100 {
1101    unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
1102    unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
1103    union tex_tile_address addr;
1104    int c;
1105
1106    float u = args->s * xpot;
1107    float v = args->t * ypot;
1108
1109    int x0, y0;
1110    const float *out;
1111
1112    addr.value = 0;
1113    addr.bits.level = args->level;
1114
1115    x0 = util_ifloor(u);
1116    if (x0 < 0)
1117       x0 = 0;
1118    else if (x0 > (int) xpot - 1)
1119       x0 = xpot - 1;
1120
1121    y0 = util_ifloor(v);
1122    if (y0 < 0)
1123       y0 = 0;
1124    else if (y0 > (int) ypot - 1)
1125       y0 = ypot - 1;
1126
1127    out = get_texel_2d_no_border(sp_sview, addr, x0, y0);
1128    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1129       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1130
1131    if (DEBUG_TEX) {
1132       print_sample(__FUNCTION__, rgba);
1133    }
1134 }
1135
1136
1137 static void
1138 img_filter_1d_nearest(struct sp_sampler_view *sp_sview,
1139                       struct sp_sampler *sp_samp,
1140                       const struct img_filter_args *args,
1141                       float rgba[TGSI_QUAD_SIZE])
1142 {
1143    const struct pipe_resource *texture = sp_sview->base.texture;
1144    int width;
1145    int x;
1146    union tex_tile_address addr;
1147    const float *out;
1148    int c;
1149
1150    width = u_minify(texture->width0, args->level);
1151
1152    assert(width > 0);
1153
1154    addr.value = 0;
1155    addr.bits.level = args->level;
1156
1157    sp_samp->nearest_texcoord_s(args->s, width, &x);
1158
1159    out = get_texel_2d(sp_sview, sp_samp, addr, x, 0);
1160    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1161       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1162
1163    if (DEBUG_TEX) {
1164       print_sample(__FUNCTION__, rgba);
1165    }
1166 }
1167
1168
1169 static void
1170 img_filter_1d_array_nearest(struct sp_sampler_view *sp_sview,
1171                             struct sp_sampler *sp_samp,
1172                             const struct img_filter_args *args,
1173                             float *rgba)
1174 {
1175    const struct pipe_resource *texture = sp_sview->base.texture;
1176    int width;
1177    int x, layer;
1178    union tex_tile_address addr;
1179    const float *out;
1180    int c;
1181
1182    width = u_minify(texture->width0, args->level);
1183
1184    assert(width > 0);
1185
1186    addr.value = 0;
1187    addr.bits.level = args->level;
1188
1189    sp_samp->nearest_texcoord_s(args->s, width, &x);
1190    layer = coord_to_layer(args->t, sp_sview->base.u.tex.first_layer,
1191                           sp_sview->base.u.tex.last_layer);
1192
1193    out = get_texel_1d_array(sp_sview, sp_samp, addr, x, layer);
1194    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1195       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1196
1197    if (DEBUG_TEX) {
1198       print_sample(__FUNCTION__, rgba);
1199    }
1200 }
1201
1202
1203 static void
1204 img_filter_2d_nearest(struct sp_sampler_view *sp_sview,
1205                       struct sp_sampler *sp_samp,
1206                       const struct img_filter_args *args,
1207                       float *rgba)
1208 {
1209    const struct pipe_resource *texture = sp_sview->base.texture;
1210    int width, height;
1211    int x, y;
1212    union tex_tile_address addr;
1213    const float *out;
1214    int c;
1215
1216    width = u_minify(texture->width0, args->level);
1217    height = u_minify(texture->height0, args->level);
1218
1219    assert(width > 0);
1220    assert(height > 0);
1221
1222    addr.value = 0;
1223    addr.bits.level = args->level;
1224
1225    sp_samp->nearest_texcoord_s(args->s, width, &x);
1226    sp_samp->nearest_texcoord_t(args->t, height, &y);
1227
1228    out = get_texel_2d(sp_sview, sp_samp, addr, x, y);
1229    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1230       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1231
1232    if (DEBUG_TEX) {
1233       print_sample(__FUNCTION__, rgba);
1234    }
1235 }
1236
1237
1238 static void
1239 img_filter_2d_array_nearest(struct sp_sampler_view *sp_sview,
1240                             struct sp_sampler *sp_samp,
1241                             const struct img_filter_args *args,
1242                             float *rgba)
1243 {
1244    const struct pipe_resource *texture = sp_sview->base.texture;
1245    int width, height;
1246    int x, y, layer;
1247    union tex_tile_address addr;
1248    const float *out;
1249    int c;
1250
1251    width = u_minify(texture->width0, args->level);
1252    height = u_minify(texture->height0, args->level);
1253
1254    assert(width > 0);
1255    assert(height > 0);
1256
1257    addr.value = 0;
1258    addr.bits.level = args->level;
1259
1260    sp_samp->nearest_texcoord_s(args->s, width, &x);
1261    sp_samp->nearest_texcoord_t(args->t, height, &y);
1262    layer = coord_to_layer(args->p, sp_sview->base.u.tex.first_layer,
1263                           sp_sview->base.u.tex.last_layer);
1264
1265    out = get_texel_2d_array(sp_sview, sp_samp, addr, x, y, layer);
1266    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1267       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1268
1269    if (DEBUG_TEX) {
1270       print_sample(__FUNCTION__, rgba);
1271    }
1272 }
1273
1274
1275 static void
1276 img_filter_cube_nearest(struct sp_sampler_view *sp_sview,
1277                         struct sp_sampler *sp_samp,
1278                         const struct img_filter_args *args,
1279                         float *rgba)
1280 {
1281    const struct pipe_resource *texture = sp_sview->base.texture;
1282    int width, height;
1283    int x, y, layerface;
1284    union tex_tile_address addr;
1285    const float *out;
1286    int c;
1287
1288    width = u_minify(texture->width0, args->level);
1289    height = u_minify(texture->height0, args->level);
1290
1291    assert(width > 0);
1292    assert(height > 0);
1293
1294    addr.value = 0;
1295    addr.bits.level = args->level;
1296
1297    /*
1298     * If NEAREST filtering is done within a miplevel, always apply wrap
1299     * mode CLAMP_TO_EDGE.
1300     */
1301    if (sp_samp->base.seamless_cube_map) {
1302       wrap_nearest_clamp_to_edge(args->s, width, &x);
1303       wrap_nearest_clamp_to_edge(args->t, height, &y);
1304    } else {
1305       /* Would probably make sense to ignore mode and just do edge clamp */
1306       sp_samp->nearest_texcoord_s(args->s, width, &x);
1307       sp_samp->nearest_texcoord_t(args->t, height, &y);
1308    }
1309
1310    layerface = args->face_id + sp_sview->base.u.tex.first_layer;
1311    out = get_texel_cube_array(sp_sview, sp_samp, addr, x, y, layerface);
1312    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1313       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1314
1315    if (DEBUG_TEX) {
1316       print_sample(__FUNCTION__, rgba);
1317    }
1318 }
1319
1320 static void
1321 img_filter_cube_array_nearest(struct sp_sampler_view *sp_sview,
1322                               struct sp_sampler *sp_samp,
1323                               const struct img_filter_args *args,
1324                               float *rgba)
1325 {
1326    const struct pipe_resource *texture = sp_sview->base.texture;
1327    int width, height;
1328    int x, y, layerface;
1329    union tex_tile_address addr;
1330    const float *out;
1331    int c;
1332
1333    width = u_minify(texture->width0, args->level);
1334    height = u_minify(texture->height0, args->level);
1335
1336    assert(width > 0);
1337    assert(height > 0);
1338
1339    addr.value = 0;
1340    addr.bits.level = args->level;
1341
1342    sp_samp->nearest_texcoord_s(args->s, width, &x);
1343    sp_samp->nearest_texcoord_t(args->t, height, &y);
1344    layerface = coord_to_layer(6 * args->p + sp_sview->base.u.tex.first_layer,
1345                               sp_sview->base.u.tex.first_layer,
1346                               sp_sview->base.u.tex.last_layer - 5) + args->face_id;
1347
1348    out = get_texel_cube_array(sp_sview, sp_samp, addr, x, y, layerface);
1349    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1350       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1351
1352    if (DEBUG_TEX) {
1353       print_sample(__FUNCTION__, rgba);
1354    }
1355 }
1356
1357 static void
1358 img_filter_3d_nearest(struct sp_sampler_view *sp_sview,
1359                       struct sp_sampler *sp_samp,
1360                       const struct img_filter_args *args,
1361                       float *rgba)
1362 {
1363    const struct pipe_resource *texture = sp_sview->base.texture;
1364    int width, height, depth;
1365    int x, y, z;
1366    union tex_tile_address addr;
1367    const float *out;
1368    int c;
1369
1370    width = u_minify(texture->width0, args->level);
1371    height = u_minify(texture->height0, args->level);
1372    depth = u_minify(texture->depth0, args->level);
1373
1374    assert(width > 0);
1375    assert(height > 0);
1376    assert(depth > 0);
1377
1378    sp_samp->nearest_texcoord_s(args->s, width,  &x);
1379    sp_samp->nearest_texcoord_t(args->t, height, &y);
1380    sp_samp->nearest_texcoord_p(args->p, depth,  &z);
1381
1382    addr.value = 0;
1383    addr.bits.level = args->level;
1384
1385    out = get_texel_3d(sp_sview, sp_samp, addr, x, y, z);
1386    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1387       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1388 }
1389
1390
1391 static void
1392 img_filter_1d_linear(struct sp_sampler_view *sp_sview,
1393                      struct sp_sampler *sp_samp,
1394                      const struct img_filter_args *args,
1395                      float *rgba)
1396 {
1397    const struct pipe_resource *texture = sp_sview->base.texture;
1398    int width;
1399    int x0, x1;
1400    float xw; /* weights */
1401    union tex_tile_address addr;
1402    const float *tx0, *tx1;
1403    int c;
1404
1405    width = u_minify(texture->width0, args->level);
1406
1407    assert(width > 0);
1408
1409    addr.value = 0;
1410    addr.bits.level = args->level;
1411
1412    sp_samp->linear_texcoord_s(args->s, width, &x0, &x1, &xw);
1413
1414    tx0 = get_texel_2d(sp_sview, sp_samp, addr, x0, 0);
1415    tx1 = get_texel_2d(sp_sview, sp_samp, addr, x1, 0);
1416
1417    /* interpolate R, G, B, A */
1418    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1419       rgba[TGSI_NUM_CHANNELS*c] = lerp(xw, tx0[c], tx1[c]);
1420 }
1421
1422
1423 static void
1424 img_filter_1d_array_linear(struct sp_sampler_view *sp_sview,
1425                            struct sp_sampler *sp_samp,
1426                            const struct img_filter_args *args,
1427                            float *rgba)
1428 {
1429    const struct pipe_resource *texture = sp_sview->base.texture;
1430    int width;
1431    int x0, x1, layer;
1432    float xw; /* weights */
1433    union tex_tile_address addr;
1434    const float *tx0, *tx1;
1435    int c;
1436
1437    width = u_minify(texture->width0, args->level);
1438
1439    assert(width > 0);
1440
1441    addr.value = 0;
1442    addr.bits.level = args->level;
1443
1444    sp_samp->linear_texcoord_s(args->s, width, &x0, &x1, &xw);
1445    layer = coord_to_layer(args->t, sp_sview->base.u.tex.first_layer,
1446                           sp_sview->base.u.tex.last_layer);
1447
1448    tx0 = get_texel_1d_array(sp_sview, sp_samp, addr, x0, layer);
1449    tx1 = get_texel_1d_array(sp_sview, sp_samp, addr, x1, layer);
1450
1451    /* interpolate R, G, B, A */
1452    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1453       rgba[TGSI_NUM_CHANNELS*c] = lerp(xw, tx0[c], tx1[c]);
1454 }
1455
1456
1457 static void
1458 img_filter_2d_linear(struct sp_sampler_view *sp_sview,
1459                      struct sp_sampler *sp_samp,
1460                      const struct img_filter_args *args,
1461                      float *rgba)
1462 {
1463    const struct pipe_resource *texture = sp_sview->base.texture;
1464    int width, height;
1465    int x0, y0, x1, y1;
1466    float xw, yw; /* weights */
1467    union tex_tile_address addr;
1468    const float *tx0, *tx1, *tx2, *tx3;
1469    int c;
1470
1471    width = u_minify(texture->width0, args->level);
1472    height = u_minify(texture->height0, args->level);
1473
1474    assert(width > 0);
1475    assert(height > 0);
1476
1477    addr.value = 0;
1478    addr.bits.level = args->level;
1479
1480    sp_samp->linear_texcoord_s(args->s, width,  &x0, &x1, &xw);
1481    sp_samp->linear_texcoord_t(args->t, height, &y0, &y1, &yw);
1482
1483    tx0 = get_texel_2d(sp_sview, sp_samp, addr, x0, y0);
1484    tx1 = get_texel_2d(sp_sview, sp_samp, addr, x1, y0);
1485    tx2 = get_texel_2d(sp_sview, sp_samp, addr, x0, y1);
1486    tx3 = get_texel_2d(sp_sview, sp_samp, addr, x1, y1);
1487
1488    /* interpolate R, G, B, A */
1489    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1490       rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1491                                           tx0[c], tx1[c],
1492                                           tx2[c], tx3[c]);
1493 }
1494
1495
1496 static void
1497 img_filter_2d_array_linear(struct sp_sampler_view *sp_sview,
1498                            struct sp_sampler *sp_samp,
1499                            const struct img_filter_args *args,
1500                            float *rgba)
1501 {
1502    const struct pipe_resource *texture = sp_sview->base.texture;
1503    int width, height;
1504    int x0, y0, x1, y1, layer;
1505    float xw, yw; /* weights */
1506    union tex_tile_address addr;
1507    const float *tx0, *tx1, *tx2, *tx3;
1508    int c;
1509
1510    width = u_minify(texture->width0, args->level);
1511    height = u_minify(texture->height0, args->level);
1512
1513    assert(width > 0);
1514    assert(height > 0);
1515
1516    addr.value = 0;
1517    addr.bits.level = args->level;
1518
1519    sp_samp->linear_texcoord_s(args->s, width,  &x0, &x1, &xw);
1520    sp_samp->linear_texcoord_t(args->t, height, &y0, &y1, &yw);
1521    layer = coord_to_layer(args->p, sp_sview->base.u.tex.first_layer,
1522                           sp_sview->base.u.tex.last_layer);
1523
1524    tx0 = get_texel_2d_array(sp_sview, sp_samp, addr, x0, y0, layer);
1525    tx1 = get_texel_2d_array(sp_sview, sp_samp, addr, x1, y0, layer);
1526    tx2 = get_texel_2d_array(sp_sview, sp_samp, addr, x0, y1, layer);
1527    tx3 = get_texel_2d_array(sp_sview, sp_samp, addr, x1, y1, layer);
1528
1529    /* interpolate R, G, B, A */
1530    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1531       rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1532                                           tx0[c], tx1[c],
1533                                           tx2[c], tx3[c]);
1534 }
1535
1536
1537 static void
1538 img_filter_cube_linear(struct sp_sampler_view *sp_sview,
1539                        struct sp_sampler *sp_samp,
1540                        const struct img_filter_args *args,
1541                        float *rgba)
1542 {
1543    const struct pipe_resource *texture = sp_sview->base.texture;
1544    int width, height;
1545    int x0, y0, x1, y1, layer;
1546    float xw, yw; /* weights */
1547    union tex_tile_address addr;
1548    const float *tx0, *tx1, *tx2, *tx3;
1549    float corner0[TGSI_QUAD_SIZE], corner1[TGSI_QUAD_SIZE],
1550          corner2[TGSI_QUAD_SIZE], corner3[TGSI_QUAD_SIZE];
1551    int c;
1552
1553    width = u_minify(texture->width0, args->level);
1554    height = u_minify(texture->height0, args->level);
1555
1556    assert(width > 0);
1557    assert(height > 0);
1558
1559    addr.value = 0;
1560    addr.bits.level = args->level;
1561
1562    /*
1563     * For seamless if LINEAR filtering is done within a miplevel,
1564     * always apply wrap mode CLAMP_TO_BORDER.
1565     */
1566    if (sp_samp->base.seamless_cube_map) {
1567       /* Note this is a bit overkill, actual clamping is not required */
1568       wrap_linear_clamp_to_border(args->s, width, &x0, &x1, &xw);
1569       wrap_linear_clamp_to_border(args->t, height, &y0, &y1, &yw);
1570    } else {
1571       /* Would probably make sense to ignore mode and just do edge clamp */
1572       sp_samp->linear_texcoord_s(args->s, width,  &x0, &x1, &xw);
1573       sp_samp->linear_texcoord_t(args->t, height, &y0, &y1, &yw);
1574    }
1575
1576    layer = sp_sview->base.u.tex.first_layer;
1577
1578    if (sp_samp->base.seamless_cube_map) {
1579       tx0 = get_texel_cube_seamless(sp_sview, addr, x0, y0, corner0, layer, args->face_id);
1580       tx1 = get_texel_cube_seamless(sp_sview, addr, x1, y0, corner1, layer, args->face_id);
1581       tx2 = get_texel_cube_seamless(sp_sview, addr, x0, y1, corner2, layer, args->face_id);
1582       tx3 = get_texel_cube_seamless(sp_sview, addr, x1, y1, corner3, layer, args->face_id);
1583    } else {
1584       tx0 = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y0, layer + args->face_id);
1585       tx1 = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y0, layer + args->face_id);
1586       tx2 = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y1, layer + args->face_id);
1587       tx3 = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y1, layer + args->face_id);
1588    }
1589
1590    /* interpolate R, G, B, A */
1591    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1592       rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1593                                           tx0[c], tx1[c],
1594                                           tx2[c], tx3[c]);
1595 }
1596
1597
1598 static void
1599 img_filter_cube_array_linear(struct sp_sampler_view *sp_sview,
1600                              struct sp_sampler *sp_samp,
1601                              const struct img_filter_args *args,
1602                              float *rgba)
1603 {
1604    const struct pipe_resource *texture = sp_sview->base.texture;
1605    int width, height;
1606    int x0, y0, x1, y1, layer;
1607    float xw, yw; /* weights */
1608    union tex_tile_address addr;
1609    const float *tx0, *tx1, *tx2, *tx3;
1610    float corner0[TGSI_QUAD_SIZE], corner1[TGSI_QUAD_SIZE],
1611          corner2[TGSI_QUAD_SIZE], corner3[TGSI_QUAD_SIZE];
1612    int c;
1613
1614    width = u_minify(texture->width0, args->level);
1615    height = u_minify(texture->height0, args->level);
1616
1617    assert(width > 0);
1618    assert(height > 0);
1619
1620    addr.value = 0;
1621    addr.bits.level = args->level;
1622
1623    /*
1624     * For seamless if LINEAR filtering is done within a miplevel,
1625     * always apply wrap mode CLAMP_TO_BORDER.
1626     */
1627    if (sp_samp->base.seamless_cube_map) {
1628       /* Note this is a bit overkill, actual clamping is not required */
1629       wrap_linear_clamp_to_border(args->s, width, &x0, &x1, &xw);
1630       wrap_linear_clamp_to_border(args->t, height, &y0, &y1, &yw);
1631    } else {
1632       /* Would probably make sense to ignore mode and just do edge clamp */
1633       sp_samp->linear_texcoord_s(args->s, width,  &x0, &x1, &xw);
1634       sp_samp->linear_texcoord_t(args->t, height, &y0, &y1, &yw);
1635    }
1636
1637    layer = coord_to_layer(6 * args->p + sp_sview->base.u.tex.first_layer,
1638                           sp_sview->base.u.tex.first_layer,
1639                           sp_sview->base.u.tex.last_layer - 5);
1640
1641    if (sp_samp->base.seamless_cube_map) {
1642       tx0 = get_texel_cube_seamless(sp_sview, addr, x0, y0, corner0, layer, args->face_id);
1643       tx1 = get_texel_cube_seamless(sp_sview, addr, x1, y0, corner1, layer, args->face_id);
1644       tx2 = get_texel_cube_seamless(sp_sview, addr, x0, y1, corner2, layer, args->face_id);
1645       tx3 = get_texel_cube_seamless(sp_sview, addr, x1, y1, corner3, layer, args->face_id);
1646    } else {
1647       tx0 = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y0, layer + args->face_id);
1648       tx1 = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y0, layer + args->face_id);
1649       tx2 = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y1, layer + args->face_id);
1650       tx3 = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y1, layer + args->face_id);
1651    }
1652
1653    /* interpolate R, G, B, A */
1654    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1655       rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1656                                           tx0[c], tx1[c],
1657                                           tx2[c], tx3[c]);
1658 }
1659
1660 static void
1661 img_filter_3d_linear(struct sp_sampler_view *sp_sview,
1662                      struct sp_sampler *sp_samp,
1663                      const struct img_filter_args *args,
1664                      float *rgba)
1665 {
1666    const struct pipe_resource *texture = sp_sview->base.texture;
1667    int width, height, depth;
1668    int x0, x1, y0, y1, z0, z1;
1669    float xw, yw, zw; /* interpolation weights */
1670    union tex_tile_address addr;
1671    const float *tx00, *tx01, *tx02, *tx03, *tx10, *tx11, *tx12, *tx13;
1672    int c;
1673
1674    width = u_minify(texture->width0, args->level);
1675    height = u_minify(texture->height0, args->level);
1676    depth = u_minify(texture->depth0, args->level);
1677
1678    addr.value = 0;
1679    addr.bits.level = args->level;
1680
1681    assert(width > 0);
1682    assert(height > 0);
1683    assert(depth > 0);
1684
1685    sp_samp->linear_texcoord_s(args->s, width,  &x0, &x1, &xw);
1686    sp_samp->linear_texcoord_t(args->t, height, &y0, &y1, &yw);
1687    sp_samp->linear_texcoord_p(args->p, depth,  &z0, &z1, &zw);
1688
1689    tx00 = get_texel_3d(sp_sview, sp_samp, addr, x0, y0, z0);
1690    tx01 = get_texel_3d(sp_sview, sp_samp, addr, x1, y0, z0);
1691    tx02 = get_texel_3d(sp_sview, sp_samp, addr, x0, y1, z0);
1692    tx03 = get_texel_3d(sp_sview, sp_samp, addr, x1, y1, z0);
1693
1694    tx10 = get_texel_3d(sp_sview, sp_samp, addr, x0, y0, z1);
1695    tx11 = get_texel_3d(sp_sview, sp_samp, addr, x1, y0, z1);
1696    tx12 = get_texel_3d(sp_sview, sp_samp, addr, x0, y1, z1);
1697    tx13 = get_texel_3d(sp_sview, sp_samp, addr, x1, y1, z1);
1698
1699       /* interpolate R, G, B, A */
1700    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1701       rgba[TGSI_NUM_CHANNELS*c] =  lerp_3d(xw, yw, zw,
1702                                            tx00[c], tx01[c],
1703                                            tx02[c], tx03[c],
1704                                            tx10[c], tx11[c],
1705                                            tx12[c], tx13[c]);
1706 }
1707
1708
1709 /* Calculate level of detail for every fragment,
1710  * with lambda already computed.
1711  * Note that lambda has already been biased by global LOD bias.
1712  * \param biased_lambda per-quad lambda.
1713  * \param lod_in per-fragment lod_bias or explicit_lod.
1714  * \param lod returns the per-fragment lod.
1715  */
1716 static INLINE void
1717 compute_lod(const struct pipe_sampler_state *sampler,
1718             enum tgsi_sampler_control control,
1719             const float biased_lambda,
1720             const float lod_in[TGSI_QUAD_SIZE],
1721             float lod[TGSI_QUAD_SIZE])
1722 {
1723    float min_lod = sampler->min_lod;
1724    float max_lod = sampler->max_lod;
1725    uint i;
1726
1727    switch (control) {
1728    case tgsi_sampler_lod_none:
1729    case tgsi_sampler_lod_zero:
1730    /* XXX FIXME */
1731    case tgsi_sampler_derivs_explicit:
1732       lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(biased_lambda, min_lod, max_lod);
1733       break;
1734    case tgsi_sampler_lod_bias:
1735       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1736          lod[i] = biased_lambda + lod_in[i];
1737          lod[i] = CLAMP(lod[i], min_lod, max_lod);
1738       }
1739       break;
1740    case tgsi_sampler_lod_explicit:
1741       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1742          lod[i] = CLAMP(lod_in[i], min_lod, max_lod);
1743       }
1744       break;
1745    default:
1746       assert(0);
1747       lod[0] = lod[1] = lod[2] = lod[3] = 0.0f;
1748    }
1749 }
1750
1751
1752 /* Calculate level of detail for every fragment.
1753  * \param lod_in per-fragment lod_bias or explicit_lod.
1754  * \param lod results per-fragment lod.
1755  */
1756 static INLINE void
1757 compute_lambda_lod(struct sp_sampler_view *sp_sview,
1758                    struct sp_sampler *sp_samp,
1759                    const float s[TGSI_QUAD_SIZE],
1760                    const float t[TGSI_QUAD_SIZE],
1761                    const float p[TGSI_QUAD_SIZE],
1762                    const float lod_in[TGSI_QUAD_SIZE],
1763                    enum tgsi_sampler_control control,
1764                    float lod[TGSI_QUAD_SIZE])
1765 {
1766    const struct pipe_sampler_state *sampler = &sp_samp->base;
1767    float lod_bias = sampler->lod_bias;
1768    float min_lod = sampler->min_lod;
1769    float max_lod = sampler->max_lod;
1770    float lambda;
1771    uint i;
1772
1773    switch (control) {
1774    case tgsi_sampler_lod_none:
1775       /* XXX FIXME */
1776    case tgsi_sampler_derivs_explicit:
1777       lambda = sp_sview->compute_lambda(sp_sview, s, t, p) + lod_bias;
1778       lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(lambda, min_lod, max_lod);
1779       break;
1780    case tgsi_sampler_lod_bias:
1781       lambda = sp_sview->compute_lambda(sp_sview, s, t, p) + lod_bias;
1782       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1783          lod[i] = lambda + lod_in[i];
1784          lod[i] = CLAMP(lod[i], min_lod, max_lod);
1785       }
1786       break;
1787    case tgsi_sampler_lod_explicit:
1788       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1789          lod[i] = CLAMP(lod_in[i], min_lod, max_lod);
1790       }
1791       break;
1792    case tgsi_sampler_lod_zero:
1793       /* this is all static state in the sampler really need clamp here? */
1794       lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(lod_bias, min_lod, max_lod);
1795       break;
1796    default:
1797       assert(0);
1798       lod[0] = lod[1] = lod[2] = lod[3] = 0.0f;
1799    }
1800 }
1801
1802
1803 static void
1804 mip_filter_linear(struct sp_sampler_view *sp_sview,
1805                   struct sp_sampler *sp_samp,
1806                   img_filter_func min_filter,
1807                   img_filter_func mag_filter,
1808                   const float s[TGSI_QUAD_SIZE],
1809                   const float t[TGSI_QUAD_SIZE],
1810                   const float p[TGSI_QUAD_SIZE],
1811                   const float c0[TGSI_QUAD_SIZE],
1812                   const float lod_in[TGSI_QUAD_SIZE],
1813                   enum tgsi_sampler_control control,
1814                   float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1815 {
1816    const struct pipe_sampler_view *psview = &sp_sview->base;
1817    int j;
1818    float lod[TGSI_QUAD_SIZE];
1819    struct img_filter_args args;
1820
1821    compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, control, lod);
1822
1823    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1824       int level0 = psview->u.tex.first_level + (int)lod[j];
1825
1826       args.s = s[j];
1827       args.t = t[j];
1828       args.p = p[j];
1829       args.face_id = sp_sview->faces[j];
1830
1831       if (lod[j] < 0.0) {
1832          args.level = psview->u.tex.first_level;
1833          mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
1834       }
1835       else if (level0 >= (int) psview->u.tex.last_level) {
1836          args.level = psview->u.tex.last_level;
1837          min_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
1838       }
1839       else {
1840          float levelBlend = frac(lod[j]);
1841          float rgbax[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
1842          int c;
1843
1844          args.level = level0;
1845          min_filter(sp_sview, sp_samp, &args, &rgbax[0][0]);
1846          args.level = level0+1;
1847          min_filter(sp_sview, sp_samp, &args, &rgbax[0][1]);
1848
1849          for (c = 0; c < 4; c++) {
1850             rgba[c][j] = lerp(levelBlend, rgbax[c][0], rgbax[c][1]);
1851          }
1852       }
1853    }
1854
1855    if (DEBUG_TEX) {
1856       print_sample_4(__FUNCTION__, rgba);
1857    }
1858 }
1859
1860
1861 /**
1862  * Compute nearest mipmap level from texcoords.
1863  * Then sample the texture level for four elements of a quad.
1864  * \param c0  the LOD bias factors, or absolute LODs (depending on control)
1865  */
1866 static void
1867 mip_filter_nearest(struct sp_sampler_view *sp_sview,
1868                    struct sp_sampler *sp_samp,
1869                    img_filter_func min_filter,
1870                    img_filter_func mag_filter,
1871                    const float s[TGSI_QUAD_SIZE],
1872                    const float t[TGSI_QUAD_SIZE],
1873                    const float p[TGSI_QUAD_SIZE],
1874                    const float c0[TGSI_QUAD_SIZE],
1875                    const float lod_in[TGSI_QUAD_SIZE],
1876                    enum tgsi_sampler_control control,
1877                    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1878 {
1879    const struct pipe_sampler_view *psview = &sp_sview->base;
1880    float lod[TGSI_QUAD_SIZE];
1881    int j;
1882    struct img_filter_args args;
1883    compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, control, lod);
1884
1885    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1886       args.s = s[j];
1887       args.t = t[j];
1888       args.p = p[j];
1889       args.face_id = sp_sview->faces[j];
1890
1891       if (lod[j] < 0.0) {
1892          args.level = psview->u.tex.first_level;
1893          mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
1894       } else {
1895          int level = psview->u.tex.first_level + (int)(lod[j] + 0.5F);
1896          args.level = MIN2(level, (int)psview->u.tex.last_level);
1897          min_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
1898       }
1899    }
1900
1901    if (DEBUG_TEX) {
1902       print_sample_4(__FUNCTION__, rgba);
1903    }
1904 }
1905
1906
1907 static void
1908 mip_filter_none(struct sp_sampler_view *sp_sview,
1909                 struct sp_sampler *sp_samp,
1910                 img_filter_func min_filter,
1911                 img_filter_func mag_filter,
1912                 const float s[TGSI_QUAD_SIZE],
1913                 const float t[TGSI_QUAD_SIZE],
1914                 const float p[TGSI_QUAD_SIZE],
1915                 const float c0[TGSI_QUAD_SIZE],
1916                 const float lod_in[TGSI_QUAD_SIZE],
1917                 enum tgsi_sampler_control control,
1918                 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1919 {
1920    float lod[TGSI_QUAD_SIZE];
1921    int j;
1922    struct img_filter_args args;
1923
1924    args.level = sp_sview->base.u.tex.first_level;
1925    compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, control, lod);
1926
1927    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1928       args.s = s[j];
1929       args.t = t[j];
1930       args.p = p[j];
1931       args.face_id = sp_sview->faces[j];
1932       if (lod[j] < 0.0) {
1933          mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
1934       }
1935       else {
1936          min_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
1937       }
1938    }
1939 }
1940
1941
1942 static void
1943 mip_filter_none_no_filter_select(struct sp_sampler_view *sp_sview,
1944                                  struct sp_sampler *sp_samp,
1945                                  img_filter_func min_filter,
1946                                  img_filter_func mag_filter,
1947                                  const float s[TGSI_QUAD_SIZE],
1948                                  const float t[TGSI_QUAD_SIZE],
1949                                  const float p[TGSI_QUAD_SIZE],
1950                                  const float c0[TGSI_QUAD_SIZE],
1951                                  const float lod_in[TGSI_QUAD_SIZE],
1952                                  enum tgsi_sampler_control control,
1953                                  float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1954 {
1955    int j;
1956    struct img_filter_args args;
1957    args.level = sp_sview->base.u.tex.first_level;
1958    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1959       args.s = s[j];
1960       args.t = t[j];
1961       args.p = p[j];
1962       args.face_id = sp_sview->faces[j];
1963       mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
1964    }
1965 }
1966
1967
1968 /* For anisotropic filtering */
1969 #define WEIGHT_LUT_SIZE 1024
1970
1971 static float *weightLut = NULL;
1972
1973 /**
1974  * Creates the look-up table used to speed-up EWA sampling
1975  */
1976 static void
1977 create_filter_table(void)
1978 {
1979    unsigned i;
1980    if (!weightLut) {
1981       weightLut = (float *) MALLOC(WEIGHT_LUT_SIZE * sizeof(float));
1982
1983       for (i = 0; i < WEIGHT_LUT_SIZE; ++i) {
1984          float alpha = 2;
1985          float r2 = (float) i / (float) (WEIGHT_LUT_SIZE - 1);
1986          float weight = (float) exp(-alpha * r2);
1987          weightLut[i] = weight;
1988       }
1989    }
1990 }
1991
1992
1993 /**
1994  * Elliptical weighted average (EWA) filter for producing high quality
1995  * anisotropic filtered results.
1996  * Based on the Higher Quality Elliptical Weighted Average Filter
1997  * published by Paul S. Heckbert in his Master's Thesis
1998  * "Fundamentals of Texture Mapping and Image Warping" (1989)
1999  */
2000 static void
2001 img_filter_2d_ewa(struct sp_sampler_view *sp_sview,
2002                   struct sp_sampler *sp_samp,
2003                   img_filter_func min_filter,
2004                   img_filter_func mag_filter,
2005                   const float s[TGSI_QUAD_SIZE],
2006                   const float t[TGSI_QUAD_SIZE],
2007                   const float p[TGSI_QUAD_SIZE],
2008                   unsigned level,
2009                   const float dudx, const float dvdx,
2010                   const float dudy, const float dvdy,
2011                   float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2012 {
2013    const struct pipe_resource *texture = sp_sview->base.texture;
2014
2015    // ??? Won't the image filters blow up if level is negative?
2016    unsigned level0 = level > 0 ? level : 0;
2017    float scaling = 1.0f / (1 << level0);
2018    int width = u_minify(texture->width0, level0);
2019    int height = u_minify(texture->height0, level0);
2020    struct img_filter_args args;
2021    float ux = dudx * scaling;
2022    float vx = dvdx * scaling;
2023    float uy = dudy * scaling;
2024    float vy = dvdy * scaling;
2025
2026    /* compute ellipse coefficients to bound the region:
2027     * A*x*x + B*x*y + C*y*y = F.
2028     */
2029    float A = vx*vx+vy*vy+1;
2030    float B = -2*(ux*vx+uy*vy);
2031    float C = ux*ux+uy*uy+1;
2032    float F = A*C-B*B/4.0f;
2033
2034    /* check if it is an ellipse */
2035    /* assert(F > 0.0); */
2036
2037    /* Compute the ellipse's (u,v) bounding box in texture space */
2038    float d = -B*B+4.0f*C*A;
2039    float box_u = 2.0f / d * sqrtf(d*C*F); /* box_u -> half of bbox with   */
2040    float box_v = 2.0f / d * sqrtf(A*d*F); /* box_v -> half of bbox height */
2041
2042    float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2043    float s_buffer[TGSI_QUAD_SIZE];
2044    float t_buffer[TGSI_QUAD_SIZE];
2045    float weight_buffer[TGSI_QUAD_SIZE];
2046    unsigned buffer_next;
2047    int j;
2048    float den; /* = 0.0F; */
2049    float ddq;
2050    float U; /* = u0 - tex_u; */
2051    int v;
2052
2053    /* Scale ellipse formula to directly index the Filter Lookup Table.
2054     * i.e. scale so that F = WEIGHT_LUT_SIZE-1
2055     */
2056    double formScale = (double) (WEIGHT_LUT_SIZE - 1) / F;
2057    A *= formScale;
2058    B *= formScale;
2059    C *= formScale;
2060    /* F *= formScale; */ /* no need to scale F as we don't use it below here */
2061
2062    /* For each quad, the du and dx values are the same and so the ellipse is
2063     * also the same. Note that texel/image access can only be performed using
2064     * a quad, i.e. it is not possible to get the pixel value for a single
2065     * tex coord. In order to have a better performance, the access is buffered
2066     * using the s_buffer/t_buffer and weight_buffer. Only when the buffer is
2067     * full, then the pixel values are read from the image.
2068     */
2069    ddq = 2 * A;
2070
2071    args.level = level;
2072    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2073       /* Heckbert MS thesis, p. 59; scan over the bounding box of the ellipse
2074        * and incrementally update the value of Ax^2+Bxy*Cy^2; when this
2075        * value, q, is less than F, we're inside the ellipse
2076        */
2077       float tex_u = -0.5F + s[j] * texture->width0 * scaling;
2078       float tex_v = -0.5F + t[j] * texture->height0 * scaling;
2079
2080       int u0 = (int) floorf(tex_u - box_u);
2081       int u1 = (int) ceilf(tex_u + box_u);
2082       int v0 = (int) floorf(tex_v - box_v);
2083       int v1 = (int) ceilf(tex_v + box_v);
2084
2085       float num[4] = {0.0F, 0.0F, 0.0F, 0.0F};
2086       buffer_next = 0;
2087       den = 0;
2088       args.face_id = sp_sview->faces[j];
2089
2090       U = u0 - tex_u;
2091       for (v = v0; v <= v1; ++v) {
2092          float V = v - tex_v;
2093          float dq = A * (2 * U + 1) + B * V;
2094          float q = (C * V + B * U) * V + A * U * U;
2095
2096          int u;
2097          for (u = u0; u <= u1; ++u) {
2098             /* Note that the ellipse has been pre-scaled so F =
2099              * WEIGHT_LUT_SIZE - 1
2100              */
2101             if (q < WEIGHT_LUT_SIZE) {
2102                /* as a LUT is used, q must never be negative;
2103                 * should not happen, though
2104                 */
2105                const int qClamped = q >= 0.0F ? q : 0;
2106                float weight = weightLut[qClamped];
2107
2108                weight_buffer[buffer_next] = weight;
2109                s_buffer[buffer_next] = u / ((float) width);
2110                t_buffer[buffer_next] = v / ((float) height);
2111
2112                buffer_next++;
2113                if (buffer_next == TGSI_QUAD_SIZE) {
2114                   /* 4 texel coords are in the buffer -> read it now */
2115                   unsigned jj;
2116                   /* it is assumed that samp->min_img_filter is set to
2117                    * img_filter_2d_nearest or one of the
2118                    * accelerated img_filter_2d_nearest_XXX functions.
2119                    */
2120                   for (jj = 0; jj < buffer_next; jj++) {
2121                      args.s = s_buffer[jj];
2122                      args.t = t_buffer[jj];
2123                      args.p = p[jj];
2124                      min_filter(sp_sview, sp_samp, &args, &rgba_temp[0][jj]);
2125                      num[0] += weight_buffer[jj] * rgba_temp[0][jj];
2126                      num[1] += weight_buffer[jj] * rgba_temp[1][jj];
2127                      num[2] += weight_buffer[jj] * rgba_temp[2][jj];
2128                      num[3] += weight_buffer[jj] * rgba_temp[3][jj];
2129                   }
2130
2131                   buffer_next = 0;
2132                }
2133
2134                den += weight;
2135             }
2136             q += dq;
2137             dq += ddq;
2138          }
2139       }
2140
2141       /* if the tex coord buffer contains unread values, we will read
2142        * them now.
2143        */
2144       if (buffer_next > 0) {
2145          unsigned jj;
2146          /* it is assumed that samp->min_img_filter is set to
2147           * img_filter_2d_nearest or one of the
2148           * accelerated img_filter_2d_nearest_XXX functions.
2149           */
2150          for (jj = 0; jj < buffer_next; jj++) {
2151             args.s = s_buffer[jj];
2152             args.t = t_buffer[jj];
2153             args.p = p[jj];
2154             min_filter(sp_sview, sp_samp, &args, &rgba_temp[0][jj]);
2155             num[0] += weight_buffer[jj] * rgba_temp[0][jj];
2156             num[1] += weight_buffer[jj] * rgba_temp[1][jj];
2157             num[2] += weight_buffer[jj] * rgba_temp[2][jj];
2158             num[3] += weight_buffer[jj] * rgba_temp[3][jj];
2159          }
2160       }
2161
2162       if (den <= 0.0F) {
2163          /* Reaching this place would mean that no pixels intersected
2164           * the ellipse.  This should never happen because the filter
2165           * we use always intersects at least one pixel.
2166           */
2167
2168          /*rgba[0]=0;
2169          rgba[1]=0;
2170          rgba[2]=0;
2171          rgba[3]=0;*/
2172          /* not enough pixels in resampling, resort to direct interpolation */
2173          args.s = s[j];
2174          args.t = t[j];
2175          args.p = p[j];
2176          min_filter(sp_sview, sp_samp, &args, &rgba_temp[0][j]);
2177          den = 1;
2178          num[0] = rgba_temp[0][j];
2179          num[1] = rgba_temp[1][j];
2180          num[2] = rgba_temp[2][j];
2181          num[3] = rgba_temp[3][j];
2182       }
2183
2184       rgba[0][j] = num[0] / den;
2185       rgba[1][j] = num[1] / den;
2186       rgba[2][j] = num[2] / den;
2187       rgba[3][j] = num[3] / den;
2188    }
2189 }
2190
2191
2192 /**
2193  * Sample 2D texture using an anisotropic filter.
2194  */
2195 static void
2196 mip_filter_linear_aniso(struct sp_sampler_view *sp_sview,
2197                         struct sp_sampler *sp_samp,
2198                         img_filter_func min_filter,
2199                         img_filter_func mag_filter,
2200                         const float s[TGSI_QUAD_SIZE],
2201                         const float t[TGSI_QUAD_SIZE],
2202                         const float p[TGSI_QUAD_SIZE],
2203                         const float c0[TGSI_QUAD_SIZE],
2204                         const float lod_in[TGSI_QUAD_SIZE],
2205                         enum tgsi_sampler_control control,
2206                         float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2207 {
2208    const struct pipe_resource *texture = sp_sview->base.texture;
2209    const struct pipe_sampler_view *psview = &sp_sview->base;
2210    int level0;
2211    float lambda;
2212    float lod[TGSI_QUAD_SIZE];
2213
2214    float s_to_u = u_minify(texture->width0, psview->u.tex.first_level);
2215    float t_to_v = u_minify(texture->height0, psview->u.tex.first_level);
2216    float dudx = (s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]) * s_to_u;
2217    float dudy = (s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]) * s_to_u;
2218    float dvdx = (t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]) * t_to_v;
2219    float dvdy = (t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]) * t_to_v;
2220    struct img_filter_args args;
2221
2222    if (control == tgsi_sampler_lod_bias ||
2223        control == tgsi_sampler_lod_none ||
2224        /* XXX FIXME */
2225        control == tgsi_sampler_derivs_explicit) {
2226       /* note: instead of working with Px and Py, we will use the
2227        * squared length instead, to avoid sqrt.
2228        */
2229       float Px2 = dudx * dudx + dvdx * dvdx;
2230       float Py2 = dudy * dudy + dvdy * dvdy;
2231
2232       float Pmax2;
2233       float Pmin2;
2234       float e;
2235       const float maxEccentricity = sp_samp->base.max_anisotropy * sp_samp->base.max_anisotropy;
2236
2237       if (Px2 < Py2) {
2238          Pmax2 = Py2;
2239          Pmin2 = Px2;
2240       }
2241       else {
2242          Pmax2 = Px2;
2243          Pmin2 = Py2;
2244       }
2245
2246       /* if the eccentricity of the ellipse is too big, scale up the shorter
2247        * of the two vectors to limit the maximum amount of work per pixel
2248        */
2249       e = Pmax2 / Pmin2;
2250       if (e > maxEccentricity) {
2251          /* float s=e / maxEccentricity;
2252             minor[0] *= s;
2253             minor[1] *= s;
2254             Pmin2 *= s; */
2255          Pmin2 = Pmax2 / maxEccentricity;
2256       }
2257
2258       /* note: we need to have Pmin=sqrt(Pmin2) here, but we can avoid
2259        * this since 0.5*log(x) = log(sqrt(x))
2260        */
2261       lambda = 0.5F * util_fast_log2(Pmin2) + sp_samp->base.lod_bias;
2262       compute_lod(&sp_samp->base, control, lambda, lod_in, lod);
2263    }
2264    else {
2265       assert(control == tgsi_sampler_lod_explicit ||
2266              control == tgsi_sampler_lod_zero);
2267       compute_lod(&sp_samp->base, control, sp_samp->base.lod_bias, lod_in, lod);
2268    }
2269
2270    /* XXX: Take into account all lod values.
2271     */
2272    lambda = lod[0];
2273    level0 = psview->u.tex.first_level + (int)lambda;
2274
2275    /* If the ellipse covers the whole image, we can
2276     * simply return the average of the whole image.
2277     */
2278    if (level0 >= (int) psview->u.tex.last_level) {
2279       int j;
2280       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2281          args.s = s[j];
2282          args.t = t[j];
2283          args.p = p[j];
2284          args.level = psview->u.tex.last_level;
2285          args.face_id = sp_sview->faces[j];
2286          min_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
2287       }
2288    }
2289    else {
2290       /* don't bother interpolating between multiple LODs; it doesn't
2291        * seem to be worth the extra running time.
2292        */
2293       img_filter_2d_ewa(sp_sview, sp_samp, min_filter, mag_filter,
2294                         s, t, p, level0,
2295                         dudx, dvdx, dudy, dvdy, rgba);
2296    }
2297
2298    if (DEBUG_TEX) {
2299       print_sample_4(__FUNCTION__, rgba);
2300    }
2301 }
2302
2303
2304 /**
2305  * Specialized version of mip_filter_linear with hard-wired calls to
2306  * 2d lambda calculation and 2d_linear_repeat_POT img filters.
2307  */
2308 static void
2309 mip_filter_linear_2d_linear_repeat_POT(
2310    struct sp_sampler_view *sp_sview,
2311    struct sp_sampler *sp_samp,
2312    img_filter_func min_filter,
2313    img_filter_func mag_filter,
2314    const float s[TGSI_QUAD_SIZE],
2315    const float t[TGSI_QUAD_SIZE],
2316    const float p[TGSI_QUAD_SIZE],
2317    const float c0[TGSI_QUAD_SIZE],
2318    const float lod_in[TGSI_QUAD_SIZE],
2319    enum tgsi_sampler_control control,
2320    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2321 {
2322    const struct pipe_sampler_view *psview = &sp_sview->base;
2323    int j;
2324    float lod[TGSI_QUAD_SIZE];
2325
2326    compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, control, lod);
2327
2328    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2329       int level0 = psview->u.tex.first_level + (int)lod[j];
2330       struct img_filter_args args;
2331       /* Catches both negative and large values of level0:
2332        */
2333       args.s = s[j];
2334       args.t = t[j];
2335       args.p = p[j];
2336       args.face_id = sp_sview->faces[j];
2337       if ((unsigned)level0 >= psview->u.tex.last_level) {
2338          if (level0 < 0)
2339             args.level = psview->u.tex.first_level;
2340          else
2341             args.level = psview->u.tex.last_level;
2342          img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, &args,
2343                                          &rgba[0][j]);
2344
2345       }
2346       else {
2347          float levelBlend = frac(lod[j]);
2348          float rgbax[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2349          int c;
2350
2351          args.level = level0;
2352          img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, &args, &rgbax[0][0]);
2353          args.level = level0+1;
2354          img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, &args, &rgbax[0][1]);
2355
2356          for (c = 0; c < TGSI_NUM_CHANNELS; c++)
2357             rgba[c][j] = lerp(levelBlend, rgbax[c][0], rgbax[c][1]);
2358       }
2359    }
2360
2361    if (DEBUG_TEX) {
2362       print_sample_4(__FUNCTION__, rgba);
2363    }
2364 }
2365
2366
2367 /**
2368  * Do shadow/depth comparisons.
2369  */
2370 static void
2371 sample_compare(struct sp_sampler_view *sp_sview,
2372                struct sp_sampler *sp_samp,
2373                const float s[TGSI_QUAD_SIZE],
2374                const float t[TGSI_QUAD_SIZE],
2375                const float p[TGSI_QUAD_SIZE],
2376                const float c0[TGSI_QUAD_SIZE],
2377                const float c1[TGSI_QUAD_SIZE],
2378                enum tgsi_sampler_control control,
2379                float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2380 {
2381    const struct pipe_sampler_state *sampler = &sp_samp->base;
2382    int j;
2383    int k[4];
2384    float pc[4];
2385    const struct util_format_description *format_desc;
2386    unsigned chan_type;
2387
2388    /**
2389     * Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
2390     * for 2D Array texture we need to use the 'c0' (aka Q).
2391     * When we sampled the depth texture, the depth value was put into all
2392     * RGBA channels.  We look at the red channel here.
2393     */
2394
2395    if (sp_sview->base.target == PIPE_TEXTURE_2D_ARRAY ||
2396        sp_sview->base.target == PIPE_TEXTURE_CUBE) {
2397       pc[0] = c0[0];
2398       pc[1] = c0[1];
2399       pc[2] = c0[2];
2400       pc[3] = c0[3];
2401    } else if (sp_sview->base.target == PIPE_TEXTURE_CUBE_ARRAY) {
2402       pc[0] = c1[0];
2403       pc[1] = c1[1];
2404       pc[2] = c1[2];
2405       pc[3] = c1[3];
2406    } else {
2407       pc[0] = p[0];
2408       pc[1] = p[1];
2409       pc[2] = p[2];
2410       pc[3] = p[3];
2411    }
2412
2413    format_desc = util_format_description(sp_sview->base.format);
2414    /* not entirely sure we couldn't end up with non-valid swizzle here */
2415    chan_type = format_desc->swizzle[0] <= UTIL_FORMAT_SWIZZLE_W ?
2416                   format_desc->channel[format_desc->swizzle[0]].type :
2417                   UTIL_FORMAT_TYPE_FLOAT;
2418    if (chan_type != UTIL_FORMAT_TYPE_FLOAT) {
2419       /*
2420        * clamping is a result of conversion to texture format, hence
2421        * doesn't happen with floats. Technically also should do comparison
2422        * in texture format (quantization!).
2423        */
2424       pc[0] = CLAMP(pc[0], 0.0F, 1.0F);
2425       pc[1] = CLAMP(pc[1], 0.0F, 1.0F);
2426       pc[2] = CLAMP(pc[2], 0.0F, 1.0F);
2427       pc[3] = CLAMP(pc[3], 0.0F, 1.0F);
2428    }
2429
2430    /* compare four texcoords vs. four texture samples */
2431    switch (sampler->compare_func) {
2432    case PIPE_FUNC_LESS:
2433       k[0] = pc[0] < rgba[0][0];
2434       k[1] = pc[1] < rgba[0][1];
2435       k[2] = pc[2] < rgba[0][2];
2436       k[3] = pc[3] < rgba[0][3];
2437       break;
2438    case PIPE_FUNC_LEQUAL:
2439       k[0] = pc[0] <= rgba[0][0];
2440       k[1] = pc[1] <= rgba[0][1];
2441       k[2] = pc[2] <= rgba[0][2];
2442       k[3] = pc[3] <= rgba[0][3];
2443       break;
2444    case PIPE_FUNC_GREATER:
2445       k[0] = pc[0] > rgba[0][0];
2446       k[1] = pc[1] > rgba[0][1];
2447       k[2] = pc[2] > rgba[0][2];
2448       k[3] = pc[3] > rgba[0][3];
2449       break;
2450    case PIPE_FUNC_GEQUAL:
2451       k[0] = pc[0] >= rgba[0][0];
2452       k[1] = pc[1] >= rgba[0][1];
2453       k[2] = pc[2] >= rgba[0][2];
2454       k[3] = pc[3] >= rgba[0][3];
2455       break;
2456    case PIPE_FUNC_EQUAL:
2457       k[0] = pc[0] == rgba[0][0];
2458       k[1] = pc[1] == rgba[0][1];
2459       k[2] = pc[2] == rgba[0][2];
2460       k[3] = pc[3] == rgba[0][3];
2461       break;
2462    case PIPE_FUNC_NOTEQUAL:
2463       k[0] = pc[0] != rgba[0][0];
2464       k[1] = pc[1] != rgba[0][1];
2465       k[2] = pc[2] != rgba[0][2];
2466       k[3] = pc[3] != rgba[0][3];
2467       break;
2468    case PIPE_FUNC_ALWAYS:
2469       k[0] = k[1] = k[2] = k[3] = 1;
2470       break;
2471    case PIPE_FUNC_NEVER:
2472       k[0] = k[1] = k[2] = k[3] = 0;
2473       break;
2474    default:
2475       k[0] = k[1] = k[2] = k[3] = 0;
2476       assert(0);
2477       break;
2478    }
2479
2480    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2481       rgba[0][j] = k[j];
2482       rgba[1][j] = k[j];
2483       rgba[2][j] = k[j];
2484       rgba[3][j] = 1.0F;
2485    }
2486 }
2487
2488
2489 static void
2490 do_swizzling(const struct pipe_sampler_view *sview,
2491              float in[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],
2492              float out[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2493 {
2494    int j;
2495    const unsigned swizzle_r = sview->swizzle_r;
2496    const unsigned swizzle_g = sview->swizzle_g;
2497    const unsigned swizzle_b = sview->swizzle_b;
2498    const unsigned swizzle_a = sview->swizzle_a;
2499
2500    switch (swizzle_r) {
2501    case PIPE_SWIZZLE_ZERO:
2502       for (j = 0; j < 4; j++)
2503          out[0][j] = 0.0f;
2504       break;
2505    case PIPE_SWIZZLE_ONE:
2506       for (j = 0; j < 4; j++)
2507          out[0][j] = 1.0f;
2508       break;
2509    default:
2510       assert(swizzle_r < 4);
2511       for (j = 0; j < 4; j++)
2512          out[0][j] = in[swizzle_r][j];
2513    }
2514
2515    switch (swizzle_g) {
2516    case PIPE_SWIZZLE_ZERO:
2517       for (j = 0; j < 4; j++)
2518          out[1][j] = 0.0f;
2519       break;
2520    case PIPE_SWIZZLE_ONE:
2521       for (j = 0; j < 4; j++)
2522          out[1][j] = 1.0f;
2523       break;
2524    default:
2525       assert(swizzle_g < 4);
2526       for (j = 0; j < 4; j++)
2527          out[1][j] = in[swizzle_g][j];
2528    }
2529
2530    switch (swizzle_b) {
2531    case PIPE_SWIZZLE_ZERO:
2532       for (j = 0; j < 4; j++)
2533          out[2][j] = 0.0f;
2534       break;
2535    case PIPE_SWIZZLE_ONE:
2536       for (j = 0; j < 4; j++)
2537          out[2][j] = 1.0f;
2538       break;
2539    default:
2540       assert(swizzle_b < 4);
2541       for (j = 0; j < 4; j++)
2542          out[2][j] = in[swizzle_b][j];
2543    }
2544
2545    switch (swizzle_a) {
2546    case PIPE_SWIZZLE_ZERO:
2547       for (j = 0; j < 4; j++)
2548          out[3][j] = 0.0f;
2549       break;
2550    case PIPE_SWIZZLE_ONE:
2551       for (j = 0; j < 4; j++)
2552          out[3][j] = 1.0f;
2553       break;
2554    default:
2555       assert(swizzle_a < 4);
2556       for (j = 0; j < 4; j++)
2557          out[3][j] = in[swizzle_a][j];
2558    }
2559 }
2560
2561
2562 static wrap_nearest_func
2563 get_nearest_unorm_wrap(unsigned mode)
2564 {
2565    switch (mode) {
2566    case PIPE_TEX_WRAP_CLAMP:
2567       return wrap_nearest_unorm_clamp;
2568    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2569       return wrap_nearest_unorm_clamp_to_edge;
2570    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2571       return wrap_nearest_unorm_clamp_to_border;
2572    default:
2573       debug_printf("illegal wrap mode %d with non-normalized coords\n", mode);
2574       return wrap_nearest_unorm_clamp;
2575    }
2576 }
2577
2578
2579 static wrap_nearest_func
2580 get_nearest_wrap(unsigned mode)
2581 {
2582    switch (mode) {
2583    case PIPE_TEX_WRAP_REPEAT:
2584       return wrap_nearest_repeat;
2585    case PIPE_TEX_WRAP_CLAMP:
2586       return wrap_nearest_clamp;
2587    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2588       return wrap_nearest_clamp_to_edge;
2589    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2590       return wrap_nearest_clamp_to_border;
2591    case PIPE_TEX_WRAP_MIRROR_REPEAT:
2592       return wrap_nearest_mirror_repeat;
2593    case PIPE_TEX_WRAP_MIRROR_CLAMP:
2594       return wrap_nearest_mirror_clamp;
2595    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
2596       return wrap_nearest_mirror_clamp_to_edge;
2597    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
2598       return wrap_nearest_mirror_clamp_to_border;
2599    default:
2600       assert(0);
2601       return wrap_nearest_repeat;
2602    }
2603 }
2604
2605
2606 static wrap_linear_func
2607 get_linear_unorm_wrap(unsigned mode)
2608 {
2609    switch (mode) {
2610    case PIPE_TEX_WRAP_CLAMP:
2611       return wrap_linear_unorm_clamp;
2612    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2613       return wrap_linear_unorm_clamp_to_edge;
2614    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2615       return wrap_linear_unorm_clamp_to_border;
2616    default:
2617       debug_printf("illegal wrap mode %d with non-normalized coords\n", mode);
2618       return wrap_linear_unorm_clamp;
2619    }
2620 }
2621
2622
2623 static wrap_linear_func
2624 get_linear_wrap(unsigned mode)
2625 {
2626    switch (mode) {
2627    case PIPE_TEX_WRAP_REPEAT:
2628       return wrap_linear_repeat;
2629    case PIPE_TEX_WRAP_CLAMP:
2630       return wrap_linear_clamp;
2631    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2632       return wrap_linear_clamp_to_edge;
2633    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2634       return wrap_linear_clamp_to_border;
2635    case PIPE_TEX_WRAP_MIRROR_REPEAT:
2636       return wrap_linear_mirror_repeat;
2637    case PIPE_TEX_WRAP_MIRROR_CLAMP:
2638       return wrap_linear_mirror_clamp;
2639    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
2640       return wrap_linear_mirror_clamp_to_edge;
2641    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
2642       return wrap_linear_mirror_clamp_to_border;
2643    default:
2644       assert(0);
2645       return wrap_linear_repeat;
2646    }
2647 }
2648
2649
2650 /**
2651  * Is swizzling needed for the given state key?
2652  */
2653 static INLINE bool
2654 any_swizzle(const struct pipe_sampler_view *view)
2655 {
2656    return (view->swizzle_r != PIPE_SWIZZLE_RED ||
2657            view->swizzle_g != PIPE_SWIZZLE_GREEN ||
2658            view->swizzle_b != PIPE_SWIZZLE_BLUE ||
2659            view->swizzle_a != PIPE_SWIZZLE_ALPHA);
2660 }
2661
2662
2663 static img_filter_func
2664 get_img_filter(const struct sp_sampler_view *sp_sview,
2665                const struct pipe_sampler_state *sampler,
2666                unsigned filter)
2667 {
2668    switch (sp_sview->base.target) {
2669    case PIPE_BUFFER:
2670    case PIPE_TEXTURE_1D:
2671       if (filter == PIPE_TEX_FILTER_NEAREST)
2672          return img_filter_1d_nearest;
2673       else
2674          return img_filter_1d_linear;
2675       break;
2676    case PIPE_TEXTURE_1D_ARRAY:
2677       if (filter == PIPE_TEX_FILTER_NEAREST)
2678          return img_filter_1d_array_nearest;
2679       else
2680          return img_filter_1d_array_linear;
2681       break;
2682    case PIPE_TEXTURE_2D:
2683    case PIPE_TEXTURE_RECT:
2684       /* Try for fast path:
2685        */
2686       if (sp_sview->pot2d &&
2687           sampler->wrap_s == sampler->wrap_t &&
2688           sampler->normalized_coords)
2689       {
2690          switch (sampler->wrap_s) {
2691          case PIPE_TEX_WRAP_REPEAT:
2692             switch (filter) {
2693             case PIPE_TEX_FILTER_NEAREST:
2694                return img_filter_2d_nearest_repeat_POT;
2695             case PIPE_TEX_FILTER_LINEAR:
2696                return img_filter_2d_linear_repeat_POT;
2697             default:
2698                break;
2699             }
2700             break;
2701          case PIPE_TEX_WRAP_CLAMP:
2702             switch (filter) {
2703             case PIPE_TEX_FILTER_NEAREST:
2704                return img_filter_2d_nearest_clamp_POT;
2705             default:
2706                break;
2707             }
2708          }
2709       }
2710       /* Otherwise use default versions:
2711        */
2712       if (filter == PIPE_TEX_FILTER_NEAREST)
2713          return img_filter_2d_nearest;
2714       else
2715          return img_filter_2d_linear;
2716       break;
2717    case PIPE_TEXTURE_2D_ARRAY:
2718       if (filter == PIPE_TEX_FILTER_NEAREST)
2719          return img_filter_2d_array_nearest;
2720       else
2721          return img_filter_2d_array_linear;
2722       break;
2723    case PIPE_TEXTURE_CUBE:
2724       if (filter == PIPE_TEX_FILTER_NEAREST)
2725          return img_filter_cube_nearest;
2726       else
2727          return img_filter_cube_linear;
2728       break;
2729    case PIPE_TEXTURE_CUBE_ARRAY:
2730       if (filter == PIPE_TEX_FILTER_NEAREST)
2731          return img_filter_cube_array_nearest;
2732       else
2733          return img_filter_cube_array_linear;
2734       break;
2735    case PIPE_TEXTURE_3D:
2736       if (filter == PIPE_TEX_FILTER_NEAREST)
2737          return img_filter_3d_nearest;
2738       else
2739          return img_filter_3d_linear;
2740       break;
2741    default:
2742       assert(0);
2743       return img_filter_1d_nearest;
2744    }
2745 }
2746
2747
2748 static void
2749 sample_mip(struct sp_sampler_view *sp_sview,
2750            struct sp_sampler *sp_samp,
2751            const float s[TGSI_QUAD_SIZE],
2752            const float t[TGSI_QUAD_SIZE],
2753            const float p[TGSI_QUAD_SIZE],
2754            const float c0[TGSI_QUAD_SIZE],
2755            const float lod[TGSI_QUAD_SIZE],
2756            enum tgsi_sampler_control control,
2757            float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2758 {
2759    mip_filter_func mip_filter;
2760    img_filter_func min_img_filter = NULL;
2761    img_filter_func mag_img_filter = NULL;
2762
2763    if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) {
2764       mip_filter = mip_filter_linear_2d_linear_repeat_POT;
2765    }
2766    else {
2767       mip_filter = sp_samp->mip_filter;
2768       min_img_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->min_img_filter);
2769       if (sp_samp->min_mag_equal) {
2770          mag_img_filter = min_img_filter;
2771       }
2772       else {
2773          mag_img_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->base.mag_img_filter);
2774       }
2775    }
2776
2777    mip_filter(sp_sview, sp_samp, min_img_filter, mag_img_filter,
2778               s, t, p, c0, lod, control, rgba);
2779
2780    if (sp_samp->base.compare_mode != PIPE_TEX_COMPARE_NONE) {
2781       sample_compare(sp_sview, sp_samp, s, t, p, c0, lod, control, rgba);
2782    }
2783
2784    if (sp_sview->need_swizzle) {
2785       float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2786       memcpy(rgba_temp, rgba, sizeof(rgba_temp));
2787       do_swizzling(&sp_sview->base, rgba_temp, rgba);
2788    }
2789
2790 }
2791
2792
2793 /**
2794  * Use 3D texcoords to choose a cube face, then sample the 2D cube faces.
2795  * Put face info into the sampler faces[] array.
2796  */
2797 static void
2798 sample_cube(struct sp_sampler_view *sp_sview,
2799             struct sp_sampler *sp_samp,
2800             const float s[TGSI_QUAD_SIZE],
2801             const float t[TGSI_QUAD_SIZE],
2802             const float p[TGSI_QUAD_SIZE],
2803             const float c0[TGSI_QUAD_SIZE],
2804             const float c1[TGSI_QUAD_SIZE],
2805             enum tgsi_sampler_control control,
2806             float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2807 {
2808    unsigned j;
2809    float ssss[4], tttt[4];
2810
2811    /* Not actually used, but the intermediate steps that do the
2812     * dereferencing don't know it.
2813     */
2814    static float pppp[4] = { 0, 0, 0, 0 };
2815
2816    pppp[0] = c0[0];
2817    pppp[1] = c0[1];
2818    pppp[2] = c0[2];
2819    pppp[3] = c0[3];
2820    /*
2821      major axis
2822      direction    target                             sc     tc    ma
2823      ----------   -------------------------------    ---    ---   ---
2824      +rx          TEXTURE_CUBE_MAP_POSITIVE_X_EXT    -rz    -ry   rx
2825      -rx          TEXTURE_CUBE_MAP_NEGATIVE_X_EXT    +rz    -ry   rx
2826      +ry          TEXTURE_CUBE_MAP_POSITIVE_Y_EXT    +rx    +rz   ry
2827      -ry          TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT    +rx    -rz   ry
2828      +rz          TEXTURE_CUBE_MAP_POSITIVE_Z_EXT    +rx    -ry   rz
2829      -rz          TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT    -rx    -ry   rz
2830    */
2831
2832    /* Choose the cube face and compute new s/t coords for the 2D face.
2833     *
2834     * Use the same cube face for all four pixels in the quad.
2835     *
2836     * This isn't ideal, but if we want to use a different cube face
2837     * per pixel in the quad, we'd have to also compute the per-face
2838     * LOD here too.  That's because the four post-face-selection
2839     * texcoords are no longer related to each other (they're
2840     * per-face!)  so we can't use subtraction to compute the partial
2841     * deriviates to compute the LOD.  Doing so (near cube edges
2842     * anyway) gives us pretty much random values.
2843     */
2844    {
2845       /* use the average of the four pixel's texcoords to choose the face */
2846       const float rx = 0.25F * (s[0] + s[1] + s[2] + s[3]);
2847       const float ry = 0.25F * (t[0] + t[1] + t[2] + t[3]);
2848       const float rz = 0.25F * (p[0] + p[1] + p[2] + p[3]);
2849       const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz);
2850
2851       if (arx >= ary && arx >= arz) {
2852          float sign = (rx >= 0.0F) ? 1.0F : -1.0F;
2853          uint face = (rx >= 0.0F) ? PIPE_TEX_FACE_POS_X : PIPE_TEX_FACE_NEG_X;
2854          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2855             const float ima = -0.5F / fabsf(s[j]);
2856             ssss[j] = sign *  p[j] * ima + 0.5F;
2857             tttt[j] =         t[j] * ima + 0.5F;
2858             sp_sview->faces[j] = face;
2859          }
2860       }
2861       else if (ary >= arx && ary >= arz) {
2862          float sign = (ry >= 0.0F) ? 1.0F : -1.0F;
2863          uint face = (ry >= 0.0F) ? PIPE_TEX_FACE_POS_Y : PIPE_TEX_FACE_NEG_Y;
2864          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2865             const float ima = -0.5F / fabsf(t[j]);
2866             ssss[j] =        -s[j] * ima + 0.5F;
2867             tttt[j] = sign * -p[j] * ima + 0.5F;
2868             sp_sview->faces[j] = face;
2869          }
2870       }
2871       else {
2872          float sign = (rz >= 0.0F) ? 1.0F : -1.0F;
2873          uint face = (rz >= 0.0F) ? PIPE_TEX_FACE_POS_Z : PIPE_TEX_FACE_NEG_Z;
2874          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2875             const float ima = -0.5F / fabsf(p[j]);
2876             ssss[j] = sign * -s[j] * ima + 0.5F;
2877             tttt[j] =         t[j] * ima + 0.5F;
2878             sp_sview->faces[j] = face;
2879          }
2880       }
2881    }
2882
2883    sample_mip(sp_sview, sp_samp, ssss, tttt, pppp, c0, c1, control, rgba);
2884 }
2885
2886
2887 static void
2888 sp_get_dims(struct sp_sampler_view *sp_sview, int level,
2889             int dims[4])
2890 {
2891    const struct pipe_sampler_view *view = &sp_sview->base;
2892    const struct pipe_resource *texture = view->texture;
2893
2894    if (view->target == PIPE_BUFFER) {
2895       dims[0] = (view->u.buf.last_element - view->u.buf.first_element) + 1;
2896       /* the other values are undefined, but let's avoid potential valgrind
2897        * warnings.
2898        */
2899       dims[1] = dims[2] = dims[3] = 0;
2900       return;
2901    }
2902
2903    /* undefined according to EXT_gpu_program */
2904    level += view->u.tex.first_level;
2905    if (level > view->u.tex.last_level)
2906       return;
2907
2908    dims[3] = view->u.tex.last_level - view->u.tex.first_level + 1;
2909    dims[0] = u_minify(texture->width0, level);
2910
2911    switch (view->target) {
2912    case PIPE_TEXTURE_1D_ARRAY:
2913       dims[1] = view->u.tex.last_layer - view->u.tex.first_layer + 1;
2914       /* fallthrough */
2915    case PIPE_TEXTURE_1D:
2916       return;
2917    case PIPE_TEXTURE_2D_ARRAY:
2918       dims[2] = view->u.tex.last_layer - view->u.tex.first_layer + 1;
2919       /* fallthrough */
2920    case PIPE_TEXTURE_2D:
2921    case PIPE_TEXTURE_CUBE:
2922    case PIPE_TEXTURE_RECT:
2923       dims[1] = u_minify(texture->height0, level);
2924       return;
2925    case PIPE_TEXTURE_3D:
2926       dims[1] = u_minify(texture->height0, level);
2927       dims[2] = u_minify(texture->depth0, level);
2928       return;
2929    case PIPE_TEXTURE_CUBE_ARRAY:
2930       dims[1] = u_minify(texture->height0, level);
2931       dims[2] = (view->u.tex.last_layer - view->u.tex.first_layer + 1) / 6;
2932       break;
2933    default:
2934       assert(!"unexpected texture target in sp_get_dims()");
2935       return;
2936    }
2937 }
2938
2939 /**
2940  * This function is only used for getting unfiltered texels via the
2941  * TXF opcode.  The GL spec says that out-of-bounds texel fetches
2942  * produce undefined results.  Instead of crashing, lets just clamp
2943  * coords to the texture image size.
2944  */
2945 static void
2946 sp_get_texels(struct sp_sampler_view *sp_sview,
2947               const int v_i[TGSI_QUAD_SIZE],
2948               const int v_j[TGSI_QUAD_SIZE],
2949               const int v_k[TGSI_QUAD_SIZE],
2950               const int lod[TGSI_QUAD_SIZE],
2951               const int8_t offset[3],
2952               float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2953 {
2954    union tex_tile_address addr;
2955    const struct pipe_resource *texture = sp_sview->base.texture;
2956    int j, c;
2957    const float *tx;
2958    int width, height, depth;
2959
2960    addr.value = 0;
2961    /* TODO write a better test for LOD */
2962    addr.bits.level = sp_sview->base.target == PIPE_BUFFER ? 0 :
2963                         CLAMP(lod[0] + sp_sview->base.u.tex.first_level,
2964                               sp_sview->base.u.tex.first_level,
2965                               sp_sview->base.u.tex.last_level);
2966
2967    width = u_minify(texture->width0, addr.bits.level);
2968    height = u_minify(texture->height0, addr.bits.level);
2969    depth = u_minify(texture->depth0, addr.bits.level);
2970
2971    switch (sp_sview->base.target) {
2972    case PIPE_BUFFER:
2973    case PIPE_TEXTURE_1D:
2974       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2975          int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
2976          tx = get_texel_2d_no_border(sp_sview, addr, x, 0);
2977          for (c = 0; c < 4; c++) {
2978             rgba[c][j] = tx[c];
2979          }
2980       }
2981       break;
2982    case PIPE_TEXTURE_1D_ARRAY:
2983       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2984          int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
2985          int y = CLAMP(v_j[j], sp_sview->base.u.tex.first_layer,
2986                        sp_sview->base.u.tex.last_layer);
2987          tx = get_texel_2d_no_border(sp_sview, addr, x, y);
2988          for (c = 0; c < 4; c++) {
2989             rgba[c][j] = tx[c];
2990          }
2991       }
2992       break;
2993    case PIPE_TEXTURE_2D:
2994    case PIPE_TEXTURE_RECT:
2995       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2996          int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
2997          int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
2998          tx = get_texel_2d_no_border(sp_sview, addr, x, y);
2999          for (c = 0; c < 4; c++) {
3000             rgba[c][j] = tx[c];
3001          }
3002       }
3003       break;
3004    case PIPE_TEXTURE_2D_ARRAY:
3005       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3006          int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
3007          int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
3008          int layer = CLAMP(v_k[j], sp_sview->base.u.tex.first_layer,
3009                            sp_sview->base.u.tex.last_layer);
3010          tx = get_texel_3d_no_border(sp_sview, addr, x, y, layer);
3011          for (c = 0; c < 4; c++) {
3012             rgba[c][j] = tx[c];
3013          }
3014       }
3015       break;
3016    case PIPE_TEXTURE_3D:
3017       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3018          int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
3019          int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
3020          int z = CLAMP(v_k[j] + offset[2], 0, depth - 1);
3021          tx = get_texel_3d_no_border(sp_sview, addr, x, y, z);
3022          for (c = 0; c < 4; c++) {
3023             rgba[c][j] = tx[c];
3024          }
3025       }
3026       break;
3027    case PIPE_TEXTURE_CUBE: /* TXF can't work on CUBE according to spec */
3028    default:
3029       assert(!"Unknown or CUBE texture type in TXF processing\n");
3030       break;
3031    }
3032
3033    if (sp_sview->need_swizzle) {
3034       float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
3035       memcpy(rgba_temp, rgba, sizeof(rgba_temp));
3036       do_swizzling(&sp_sview->base, rgba_temp, rgba);
3037    }
3038 }
3039
3040
3041 void *
3042 softpipe_create_sampler_state(struct pipe_context *pipe,
3043                               const struct pipe_sampler_state *sampler)
3044 {
3045    struct sp_sampler *samp = CALLOC_STRUCT(sp_sampler);
3046
3047    samp->base = *sampler;
3048
3049    /* Note that (for instance) linear_texcoord_s and
3050     * nearest_texcoord_s may be active at the same time, if the
3051     * sampler min_img_filter differs from its mag_img_filter.
3052     */
3053    if (sampler->normalized_coords) {
3054       samp->linear_texcoord_s = get_linear_wrap( sampler->wrap_s );
3055       samp->linear_texcoord_t = get_linear_wrap( sampler->wrap_t );
3056       samp->linear_texcoord_p = get_linear_wrap( sampler->wrap_r );
3057
3058       samp->nearest_texcoord_s = get_nearest_wrap( sampler->wrap_s );
3059       samp->nearest_texcoord_t = get_nearest_wrap( sampler->wrap_t );
3060       samp->nearest_texcoord_p = get_nearest_wrap( sampler->wrap_r );
3061    }
3062    else {
3063       samp->linear_texcoord_s = get_linear_unorm_wrap( sampler->wrap_s );
3064       samp->linear_texcoord_t = get_linear_unorm_wrap( sampler->wrap_t );
3065       samp->linear_texcoord_p = get_linear_unorm_wrap( sampler->wrap_r );
3066
3067       samp->nearest_texcoord_s = get_nearest_unorm_wrap( sampler->wrap_s );
3068       samp->nearest_texcoord_t = get_nearest_unorm_wrap( sampler->wrap_t );
3069       samp->nearest_texcoord_p = get_nearest_unorm_wrap( sampler->wrap_r );
3070    }
3071
3072    samp->min_img_filter = sampler->min_img_filter;
3073
3074    switch (sampler->min_mip_filter) {
3075    case PIPE_TEX_MIPFILTER_NONE:
3076       if (sampler->min_img_filter == sampler->mag_img_filter)
3077          samp->mip_filter = mip_filter_none_no_filter_select;
3078       else
3079          samp->mip_filter = mip_filter_none;
3080       break;
3081
3082    case PIPE_TEX_MIPFILTER_NEAREST:
3083       samp->mip_filter = mip_filter_nearest;
3084       break;
3085
3086    case PIPE_TEX_MIPFILTER_LINEAR:
3087       if (sampler->min_img_filter == sampler->mag_img_filter &&
3088           sampler->normalized_coords &&
3089           sampler->wrap_s == PIPE_TEX_WRAP_REPEAT &&
3090           sampler->wrap_t == PIPE_TEX_WRAP_REPEAT &&
3091           sampler->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
3092           sampler->max_anisotropy <= 1) {
3093          samp->min_mag_equal_repeat_linear = TRUE;
3094       }
3095       samp->mip_filter = mip_filter_linear;
3096
3097       /* Anisotropic filtering extension. */
3098       if (sampler->max_anisotropy > 1) {
3099          samp->mip_filter = mip_filter_linear_aniso;
3100
3101          /* Override min_img_filter:
3102           * min_img_filter needs to be set to NEAREST since we need to access
3103           * each texture pixel as it is and weight it later; using linear
3104           * filters will have incorrect results.
3105           * By setting the filter to NEAREST here, we can avoid calling the
3106           * generic img_filter_2d_nearest in the anisotropic filter function,
3107           * making it possible to use one of the accelerated implementations
3108           */
3109          samp->min_img_filter = PIPE_TEX_FILTER_NEAREST;
3110
3111          /* on first access create the lookup table containing the filter weights. */
3112         if (!weightLut) {
3113            create_filter_table();
3114         }
3115       }
3116       break;
3117    }
3118    if (samp->min_img_filter == sampler->mag_img_filter) {
3119       samp->min_mag_equal = TRUE;
3120    }
3121
3122    return (void *)samp;
3123 }
3124
3125
3126 compute_lambda_func
3127 softpipe_get_lambda_func(const struct pipe_sampler_view *view, unsigned shader)
3128 {
3129    if (shader != PIPE_SHADER_FRAGMENT)
3130       return compute_lambda_vert;
3131
3132    switch (view->target) {
3133    case PIPE_BUFFER:
3134    case PIPE_TEXTURE_1D:
3135    case PIPE_TEXTURE_1D_ARRAY:
3136       return compute_lambda_1d;
3137    case PIPE_TEXTURE_2D:
3138    case PIPE_TEXTURE_2D_ARRAY:
3139    case PIPE_TEXTURE_RECT:
3140    case PIPE_TEXTURE_CUBE:
3141    case PIPE_TEXTURE_CUBE_ARRAY:
3142       return compute_lambda_2d;
3143    case PIPE_TEXTURE_3D:
3144       return compute_lambda_3d;
3145    default:
3146       assert(0);
3147       return compute_lambda_1d;
3148    }
3149 }
3150
3151
3152 struct pipe_sampler_view *
3153 softpipe_create_sampler_view(struct pipe_context *pipe,
3154                              struct pipe_resource *resource,
3155                              const struct pipe_sampler_view *templ)
3156 {
3157    struct sp_sampler_view *sview = CALLOC_STRUCT(sp_sampler_view);
3158    struct softpipe_resource *spr = (struct softpipe_resource *)resource;
3159
3160    if (sview) {
3161       struct pipe_sampler_view *view = &sview->base;
3162       *view = *templ;
3163       view->reference.count = 1;
3164       view->texture = NULL;
3165       pipe_resource_reference(&view->texture, resource);
3166       view->context = pipe;
3167
3168 #ifdef DEBUG
3169      /*
3170       * This is possibly too lenient, but the primary reason is just
3171       * to catch state trackers which forget to initialize this, so
3172       * it only catches clearly impossible view targets.
3173       */
3174       if (view->target != resource->target) {
3175          if (view->target == PIPE_TEXTURE_1D)
3176             assert(resource->target == PIPE_TEXTURE_1D_ARRAY);
3177          else if (view->target == PIPE_TEXTURE_1D_ARRAY)
3178             assert(resource->target == PIPE_TEXTURE_1D);
3179          else if (view->target == PIPE_TEXTURE_2D)
3180             assert(resource->target == PIPE_TEXTURE_2D_ARRAY ||
3181                    resource->target == PIPE_TEXTURE_CUBE ||
3182                    resource->target == PIPE_TEXTURE_CUBE_ARRAY);
3183          else if (view->target == PIPE_TEXTURE_2D_ARRAY)
3184             assert(resource->target == PIPE_TEXTURE_2D ||
3185                    resource->target == PIPE_TEXTURE_CUBE ||
3186                    resource->target == PIPE_TEXTURE_CUBE_ARRAY);
3187          else if (view->target == PIPE_TEXTURE_CUBE)
3188             assert(resource->target == PIPE_TEXTURE_CUBE_ARRAY ||
3189                    resource->target == PIPE_TEXTURE_2D_ARRAY);
3190          else if (view->target == PIPE_TEXTURE_CUBE_ARRAY)
3191             assert(resource->target == PIPE_TEXTURE_CUBE ||
3192                    resource->target == PIPE_TEXTURE_2D_ARRAY);
3193          else
3194             assert(0);
3195       }
3196 #endif
3197
3198       if (any_swizzle(view)) {
3199          sview->need_swizzle = TRUE;
3200       }
3201
3202       if (view->target == PIPE_TEXTURE_CUBE ||
3203           view->target == PIPE_TEXTURE_CUBE_ARRAY)
3204          sview->get_samples = sample_cube;
3205       else {
3206          sview->get_samples = sample_mip;
3207       }
3208       sview->pot2d = spr->pot &&
3209                      (view->target == PIPE_TEXTURE_2D ||
3210                       view->target == PIPE_TEXTURE_RECT);
3211
3212       sview->xpot = util_logbase2( resource->width0 );
3213       sview->ypot = util_logbase2( resource->height0 );
3214    }
3215
3216    return (struct pipe_sampler_view *) sview;
3217 }
3218
3219
3220 static void
3221 sp_tgsi_get_dims(struct tgsi_sampler *tgsi_sampler,
3222                  const unsigned sview_index,
3223                  int level, int dims[4])
3224 {
3225    struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler;
3226
3227    assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
3228    /* always have a view here but texture is NULL if no sampler view was set. */
3229    if (!sp_samp->sp_sview[sview_index].base.texture) {
3230       dims[0] = dims[1] = dims[2] = dims[3] = 0;
3231       return;
3232    }
3233    sp_get_dims(&sp_samp->sp_sview[sview_index], level, dims);
3234 }
3235
3236
3237 static void
3238 sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler,
3239                     const unsigned sview_index,
3240                     const unsigned sampler_index,
3241                     const float s[TGSI_QUAD_SIZE],
3242                     const float t[TGSI_QUAD_SIZE],
3243                     const float p[TGSI_QUAD_SIZE],
3244                     const float c0[TGSI_QUAD_SIZE],
3245                     const float lod[TGSI_QUAD_SIZE],
3246                     float derivs[3][2][TGSI_QUAD_SIZE],
3247                     const int8_t offset[3],
3248                     enum tgsi_sampler_control control,
3249                     float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
3250 {
3251    struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler;
3252
3253    assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
3254    assert(sampler_index < PIPE_MAX_SAMPLERS);
3255    assert(sp_samp->sp_sampler[sampler_index]);
3256    /* always have a view here but texture is NULL if no sampler view was set. */
3257    if (!sp_samp->sp_sview[sview_index].base.texture) {
3258       int i, j;
3259       for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
3260          for (i = 0; i < TGSI_QUAD_SIZE; i++) {
3261             rgba[j][i] = 0.0f;
3262          }
3263       }
3264       return;
3265    }
3266    sp_samp->sp_sview[sview_index].get_samples(&sp_samp->sp_sview[sview_index],
3267                                               sp_samp->sp_sampler[sampler_index],
3268                                               s, t, p, c0, lod, control, rgba);
3269 }
3270
3271
3272 static void
3273 sp_tgsi_get_texel(struct tgsi_sampler *tgsi_sampler,
3274                   const unsigned sview_index,
3275                   const int i[TGSI_QUAD_SIZE],
3276                   const int j[TGSI_QUAD_SIZE], const int k[TGSI_QUAD_SIZE],
3277                   const int lod[TGSI_QUAD_SIZE], const int8_t offset[3],
3278                   float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
3279 {
3280    struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler;
3281
3282    assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
3283    /* always have a view here but texture is NULL if no sampler view was set. */
3284    if (!sp_samp->sp_sview[sview_index].base.texture) {
3285       int i, j;
3286       for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
3287          for (i = 0; i < TGSI_QUAD_SIZE; i++) {
3288             rgba[j][i] = 0.0f;
3289          }
3290       }
3291       return;
3292    }
3293    sp_get_texels(&sp_samp->sp_sview[sview_index], i, j, k, lod, offset, rgba);
3294 }
3295
3296
3297 struct sp_tgsi_sampler *
3298 sp_create_tgsi_sampler(void)
3299 {
3300    struct sp_tgsi_sampler *samp = CALLOC_STRUCT(sp_tgsi_sampler);
3301    if (!samp)
3302       return NULL;
3303
3304    samp->base.get_dims = sp_tgsi_get_dims;
3305    samp->base.get_samples = sp_tgsi_get_samples;
3306    samp->base.get_texel = sp_tgsi_get_texel;
3307
3308    return samp;
3309 }
3310