src/gallium/drivers/softpipe/sp_tex_sample.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 VMware, Inc.
   4  * All Rights Reserved.
   5  * Copyright 2008-2010 VMware, Inc.  All rights reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the
   9  * "Software"), to deal in the Software without restriction, including
  10  * without limitation the rights to use, copy, modify, merge, publish,
  11  * distribute, sub license, and/or sell copies of the Software, and to
  12  * permit persons to whom the Software is furnished to do so, subject to
  13  * the following conditions:
  14  *
  15  * The above copyright notice and this permission notice (including the
  16  * next paragraph) shall be included in all copies or substantial portions
  17  * of the Software.
  18  *
  19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  22  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26  *
  27  **************************************************************************/
  28
  29 /**
  30  * Texture sampling
  31  *
  32  * Authors:
  33  *   Brian Paul
  34  *   Keith Whitwell
  35  */
  36
  37 #include "pipe/p_context.h"
  38 #include "pipe/p_defines.h"
  39 #include "pipe/p_shader_tokens.h"
  40 #include "util/u_math.h"
  41 #include "util/u_format.h"
  42 #include "util/u_memory.h"
  43 #include "util/u_inlines.h"
  44 #include "sp_quad.h"   /* only for #define QUAD_* tokens */
  45 #include "sp_tex_sample.h"
  46 #include "sp_texture.h"
  47 #include "sp_tex_tile_cache.h"
  48
  49
  50 /** Set to one to help debug texture sampling */
  51 #define DEBUG_TEX 0
  52
  53
  54 /*
  55  * Return fractional part of 'f'.  Used for computing interpolation weights.
  56  * Need to be careful with negative values.
  57  * Note, if this function isn't perfect you'll sometimes see 1-pixel bands
  58  * of improperly weighted linear-filtered textures.
  59  * The tests/texwrap.c demo is a good test.
  60  */
  61 static INLINE float
  62 frac(float f)
  63 {
  64    return f - floorf(f);
  65 }
  66
  67
  68
  69 /**
  70  * Linear interpolation macro
  71  */
  72 static INLINE float
  73 lerp(float a, float v0, float v1)
  74 {
  75    return v0 + a * (v1 - v0);
  76 }
  77
  78
  79 /**
  80  * Do 2D/bilinear interpolation of float values.
  81  * v00, v10, v01 and v11 are typically four texture samples in a square/box.
  82  * a and b are the horizontal and vertical interpolants.
  83  * It's important that this function is inlined when compiled with
  84  * optimization!  If we find that's not true on some systems, convert
  85  * to a macro.
  86  */
  87 static INLINE float
  88 lerp_2d(float a, float b,
  89         float v00, float v10, float v01, float v11)
  90 {
  91    const float temp0 = lerp(a, v00, v10);
  92    const float temp1 = lerp(a, v01, v11);
  93    return lerp(b, temp0, temp1);
  94 }
  95
  96
  97 /**
  98  * As above, but 3D interpolation of 8 values.
  99  */
 100 static INLINE float
 101 lerp_3d(float a, float b, float c,
 102         float v000, float v100, float v010, float v110,
 103         float v001, float v101, float v011, float v111)
 104 {
 105    const float temp0 = lerp_2d(a, b, v000, v100, v010, v110);
 106    const float temp1 = lerp_2d(a, b, v001, v101, v011, v111);
 107    return lerp(c, temp0, temp1);
 108 }
 109
 110
 111
 112 /**
 113  * Compute coord % size for repeat wrap modes.
 114  * Note that if coord is negative, coord % size doesn't give the right
 115  * value.  To avoid that problem we add a large multiple of the size
 116  * (rather than using a conditional).
 117  */
 118 static INLINE int
 119 repeat(int coord, unsigned size)
 120 {
 121    return (coord + size * 1024) % size;
 122 }
 123
 124
 125 /**
 126  * Apply texture coord wrapping mode and return integer texture indexes
 127  * for a vector of four texcoords (S or T or P).
 128  * \param wrapMode  PIPE_TEX_WRAP_x
 129  * \param s  the incoming texcoords
 130  * \param size  the texture image size
 131  * \param icoord  returns the integer texcoords
 132  */
 133 static void
 134 wrap_nearest_repeat(float s, unsigned size, int *icoord)
 135 {
 136    /* s limited to [0,1) */
 137    /* i limited to [0,size-1] */
 138    int i = util_ifloor(s * size);
 139    *icoord = repeat(i, size);
 140 }
 141
 142
 143 static void
 144 wrap_nearest_clamp(float s, unsigned size, int *icoord)
 145 {
 146    /* s limited to [0,1] */
 147    /* i limited to [0,size-1] */
 148    if (s <= 0.0F)
 149       *icoord = 0;
 150    else if (s >= 1.0F)
 151       *icoord = size - 1;
 152    else
 153       *icoord = util_ifloor(s * size);
 154 }
 155
 156
 157 static void
 158 wrap_nearest_clamp_to_edge(float s, unsigned size, int *icoord)
 159 {
 160    /* s limited to [min,max] */
 161    /* i limited to [0, size-1] */
 162    const float min = 1.0F / (2.0F * size);
 163    const float max = 1.0F - min;
 164    if (s < min)
 165       *icoord = 0;
 166    else if (s > max)
 167       *icoord = size - 1;
 168    else
 169       *icoord = util_ifloor(s * size);
 170 }
 171
 172
 173 static void
 174 wrap_nearest_clamp_to_border(float s, unsigned size, int *icoord)
 175 {
 176    /* s limited to [min,max] */
 177    /* i limited to [-1, size] */
 178    const float min = -1.0F / (2.0F * size);
 179    const float max = 1.0F - min;
 180    if (s <= min)
 181       *icoord = -1;
 182    else if (s >= max)
 183       *icoord = size;
 184    else
 185       *icoord = util_ifloor(s * size);
 186 }
 187
 188
 189 static void
 190 wrap_nearest_mirror_repeat(float s, unsigned size, int *icoord)
 191 {
 192    const float min = 1.0F / (2.0F * size);
 193    const float max = 1.0F - min;
 194    const int flr = util_ifloor(s);
 195    float u = frac(s);
 196    if (flr & 1)
 197       u = 1.0F - u;
 198    if (u < min)
 199       *icoord = 0;
 200    else if (u > max)
 201       *icoord = size - 1;
 202    else
 203       *icoord = util_ifloor(u * size);
 204 }
 205
 206
 207 static void
 208 wrap_nearest_mirror_clamp(float s, unsigned size, int *icoord)
 209 {
 210    /* s limited to [0,1] */
 211    /* i limited to [0,size-1] */
 212    const float u = fabsf(s);
 213    if (u <= 0.0F)
 214       *icoord = 0;
 215    else if (u >= 1.0F)
 216       *icoord = size - 1;
 217    else
 218       *icoord = util_ifloor(u * size);
 219 }
 220
 221
 222 static void
 223 wrap_nearest_mirror_clamp_to_edge(float s, unsigned size, int *icoord)
 224 {
 225    /* s limited to [min,max] */
 226    /* i limited to [0, size-1] */
 227    const float min = 1.0F / (2.0F * size);
 228    const float max = 1.0F - min;
 229    const float u = fabsf(s);
 230    if (u < min)
 231       *icoord = 0;
 232    else if (u > max)
 233       *icoord = size - 1;
 234    else
 235       *icoord = util_ifloor(u * size);
 236 }
 237
 238
 239 static void
 240 wrap_nearest_mirror_clamp_to_border(float s, unsigned size, int *icoord)
 241 {
 242    /* s limited to [min,max] */
 243    /* i limited to [0, size-1] */
 244    const float min = -1.0F / (2.0F * size);
 245    const float max = 1.0F - min;
 246    const float u = fabsf(s);
 247    if (u < min)
 248       *icoord = -1;
 249    else if (u > max)
 250       *icoord = size;
 251    else
 252       *icoord = util_ifloor(u * size);
 253 }
 254
 255
 256 /**
 257  * Used to compute texel locations for linear sampling
 258  * \param wrapMode  PIPE_TEX_WRAP_x
 259  * \param s  the texcoord
 260  * \param size  the texture image size
 261  * \param icoord0  returns first texture index
 262  * \param icoord1  returns second texture index (usually icoord0 + 1)
 263  * \param w  returns blend factor/weight between texture indices
 264  * \param icoord  returns the computed integer texture coord
 265  */
 266 static void
 267 wrap_linear_repeat(float s, unsigned size,
 268                    int *icoord0, int *icoord1, float *w)
 269 {
 270    float u = s * size - 0.5F;
 271    *icoord0 = repeat(util_ifloor(u), size);
 272    *icoord1 = repeat(*icoord0 + 1, size);
 273    *w = frac(u);
 274 }
 275
 276
 277 static void
 278 wrap_linear_clamp(float s, unsigned size,
 279                   int *icoord0, int *icoord1, float *w)
 280 {
 281    float u = CLAMP(s, 0.0F, 1.0F);
 282    u = u * size - 0.5f;
 283    *icoord0 = util_ifloor(u);
 284    *icoord1 = *icoord0 + 1;
 285    *w = frac(u);
 286 }
 287
 288
 289 static void
 290 wrap_linear_clamp_to_edge(float s, unsigned size,
 291                           int *icoord0, int *icoord1, float *w)
 292 {
 293    float u = CLAMP(s, 0.0F, 1.0F);
 294    u = u * size - 0.5f;
 295    *icoord0 = util_ifloor(u);
 296    *icoord1 = *icoord0 + 1;
 297    if (*icoord0 < 0)
 298       *icoord0 = 0;
 299    if (*icoord1 >= (int) size)
 300       *icoord1 = size - 1;
 301    *w = frac(u);
 302 }
 303
 304
 305 static void
 306 wrap_linear_clamp_to_border(float s, unsigned size,
 307                             int *icoord0, int *icoord1, float *w)
 308 {
 309    const float min = -1.0F / (2.0F * size);
 310    const float max = 1.0F - min;
 311    float u = CLAMP(s, min, max);
 312    u = u * size - 0.5f;
 313    *icoord0 = util_ifloor(u);
 314    *icoord1 = *icoord0 + 1;
 315    *w = frac(u);
 316 }
 317
 318
 319 static void
 320 wrap_linear_mirror_repeat(float s, unsigned size,
 321                           int *icoord0, int *icoord1, float *w)
 322 {
 323    const int flr = util_ifloor(s);
 324    float u = frac(s);
 325    if (flr & 1)
 326       u = 1.0F - u;
 327    u = u * size - 0.5F;
 328    *icoord0 = util_ifloor(u);
 329    *icoord1 = *icoord0 + 1;
 330    if (*icoord0 < 0)
 331       *icoord0 = 0;
 332    if (*icoord1 >= (int) size)
 333       *icoord1 = size - 1;
 334    *w = frac(u);
 335 }
 336
 337
 338 static void
 339 wrap_linear_mirror_clamp(float s, unsigned size,
 340                          int *icoord0, int *icoord1, float *w)
 341 {
 342    float u = fabsf(s);
 343    if (u >= 1.0F)
 344       u = (float) size;
 345    else
 346       u *= size;
 347    u -= 0.5F;
 348    *icoord0 = util_ifloor(u);
 349    *icoord1 = *icoord0 + 1;
 350    *w = frac(u);
 351 }
 352
 353
 354 static void
 355 wrap_linear_mirror_clamp_to_edge(float s, unsigned size,
 356                                  int *icoord0, int *icoord1, float *w)
 357 {
 358    float u = fabsf(s);
 359    if (u >= 1.0F)
 360       u = (float) size;
 361    else
 362       u *= size;
 363    u -= 0.5F;
 364    *icoord0 = util_ifloor(u);
 365    *icoord1 = *icoord0 + 1;
 366    if (*icoord0 < 0)
 367       *icoord0 = 0;
 368    if (*icoord1 >= (int) size)
 369       *icoord1 = size - 1;
 370    *w = frac(u);
 371 }
 372
 373
 374 static void
 375 wrap_linear_mirror_clamp_to_border(float s, unsigned size,
 376                                    int *icoord0, int *icoord1, float *w)
 377 {
 378    const float min = -1.0F / (2.0F * size);
 379    const float max = 1.0F - min;
 380    float u = fabsf(s);
 381    if (u <= min)
 382       u = min * size;
 383    else if (u >= max)
 384       u = max * size;
 385    else
 386       u *= size;
 387    u -= 0.5F;
 388    *icoord0 = util_ifloor(u);
 389    *icoord1 = *icoord0 + 1;
 390    *w = frac(u);
 391 }
 392
 393
 394 /**
 395  * PIPE_TEX_WRAP_CLAMP for nearest sampling, unnormalized coords.
 396  */
 397 static void
 398 wrap_nearest_unorm_clamp(float s, unsigned size, int *icoord)
 399 {
 400    int i = util_ifloor(s);
 401    *icoord = CLAMP(i, 0, (int) size-1);
 402 }
 403
 404
 405 /**
 406  * PIPE_TEX_WRAP_CLAMP_TO_BORDER for nearest sampling, unnormalized coords.
 407  */
 408 static void
 409 wrap_nearest_unorm_clamp_to_border(float s, unsigned size, int *icoord)
 410 {
 411    *icoord = util_ifloor( CLAMP(s, -0.5F, (float) size + 0.5F) );
 412 }
 413
 414
 415 /**
 416  * PIPE_TEX_WRAP_CLAMP_TO_EDGE for nearest sampling, unnormalized coords.
 417  */
 418 static void
 419 wrap_nearest_unorm_clamp_to_edge(float s, unsigned size, int *icoord)
 420 {
 421    *icoord = util_ifloor( CLAMP(s, 0.5F, (float) size - 0.5F) );
 422 }
 423
 424
 425 /**
 426  * PIPE_TEX_WRAP_CLAMP for linear sampling, unnormalized coords.
 427  */
 428 static void
 429 wrap_linear_unorm_clamp(float s, unsigned size,
 430                         int *icoord0, int *icoord1, float *w)
 431 {
 432    /* Not exactly what the spec says, but it matches NVIDIA output */
 433    float u = CLAMP(s - 0.5F, 0.0f, (float) size - 1.0f);
 434    *icoord0 = util_ifloor(u);
 435    *icoord1 = *icoord0 + 1;
 436    *w = frac(u);
 437 }
 438
 439
 440 /**
 441  * PIPE_TEX_WRAP_CLAMP_TO_BORDER for linear sampling, unnormalized coords.
 442  */
 443 static void
 444 wrap_linear_unorm_clamp_to_border(float s, unsigned size,
 445                                   int *icoord0, int *icoord1, float *w)
 446 {
 447    float u = CLAMP(s, -0.5F, (float) size + 0.5F);
 448    u -= 0.5F;
 449    *icoord0 = util_ifloor(u);
 450    *icoord1 = *icoord0 + 1;
 451    if (*icoord1 > (int) size - 1)
 452       *icoord1 = size - 1;
 453    *w = frac(u);
 454 }
 455
 456
 457 /**
 458  * PIPE_TEX_WRAP_CLAMP_TO_EDGE for linear sampling, unnormalized coords.
 459  */
 460 static void
 461 wrap_linear_unorm_clamp_to_edge(float s, unsigned size,
 462                                 int *icoord0, int *icoord1, float *w)
 463 {
 464    float u = CLAMP(s, +0.5F, (float) size - 0.5F);
 465    u -= 0.5F;
 466    *icoord0 = util_ifloor(u);
 467    *icoord1 = *icoord0 + 1;
 468    if (*icoord1 > (int) size - 1)
 469       *icoord1 = size - 1;
 470    *w = frac(u);
 471 }
 472
 473
 474 /**
 475  * Do coordinate to array index conversion.  For array textures.
 476  */
 477 static INLINE void
 478 wrap_array_layer(float coord, unsigned size, int *layer)
 479 {
 480    int c = util_ifloor(coord + 0.5F);
 481    *layer = CLAMP(c, 0, (int) size - 1);
 482 }
 483
 484
 485 /**
 486  * Examine the quad's texture coordinates to compute the partial
 487  * derivatives w.r.t X and Y, then compute lambda (level of detail).
 488  */
 489 static float
 490 compute_lambda_1d(const struct sp_sampler_view *sview,
 491                   const float s[TGSI_QUAD_SIZE],
 492                   const float t[TGSI_QUAD_SIZE],
 493                   const float p[TGSI_QUAD_SIZE])
 494 {
 495    const struct pipe_resource *texture = sview->base.texture;
 496    float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
 497    float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
 498    float rho = MAX2(dsdx, dsdy) * u_minify(texture->width0, sview->base.u.tex.first_level);
 499
 500    return util_fast_log2(rho);
 501 }
 502
 503
 504 static float
 505 compute_lambda_2d(const struct sp_sampler_view *sview,
 506                   const float s[TGSI_QUAD_SIZE],
 507                   const float t[TGSI_QUAD_SIZE],
 508                   const float p[TGSI_QUAD_SIZE])
 509 {
 510    const struct pipe_resource *texture = sview->base.texture;
 511    float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
 512    float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
 513    float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
 514    float dtdy = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
 515    float maxx = MAX2(dsdx, dsdy) * u_minify(texture->width0, sview->base.u.tex.first_level);
 516    float maxy = MAX2(dtdx, dtdy) * u_minify(texture->height0, sview->base.u.tex.first_level);
 517    float rho  = MAX2(maxx, maxy);
 518
 519    return util_fast_log2(rho);
 520 }
 521
 522
 523 static float
 524 compute_lambda_3d(const struct sp_sampler_view *sview,
 525                   const float s[TGSI_QUAD_SIZE],
 526                   const float t[TGSI_QUAD_SIZE],
 527                   const float p[TGSI_QUAD_SIZE])
 528 {
 529    const struct pipe_resource *texture = sview->base.texture;
 530    float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
 531    float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
 532    float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
 533    float dtdy = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
 534    float dpdx = fabsf(p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]);
 535    float dpdy = fabsf(p[QUAD_TOP_LEFT]     - p[QUAD_BOTTOM_LEFT]);
 536    float maxx = MAX2(dsdx, dsdy) * u_minify(texture->width0, sview->base.u.tex.first_level);
 537    float maxy = MAX2(dtdx, dtdy) * u_minify(texture->height0, sview->base.u.tex.first_level);
 538    float maxz = MAX2(dpdx, dpdy) * u_minify(texture->depth0, sview->base.u.tex.first_level);
 539    float rho;
 540
 541    rho = MAX2(maxx, maxy);
 542    rho = MAX2(rho, maxz);
 543
 544    return util_fast_log2(rho);
 545 }
 546
 547
 548 /**
 549  * Compute lambda for a vertex texture sampler.
 550  * Since there aren't derivatives to use, just return 0.
 551  */
 552 static float
 553 compute_lambda_vert(const struct sp_sampler_view *sview,
 554                     const float s[TGSI_QUAD_SIZE],
 555                     const float t[TGSI_QUAD_SIZE],
 556                     const float p[TGSI_QUAD_SIZE])
 557 {
 558    return 0.0f;
 559 }
 560
 561
 562
 563 /**
 564  * Get a texel from a texture, using the texture tile cache.
 565  *
 566  * \param addr  the template tex address containing cube, z, face info.
 567  * \param x  the x coord of texel within 2D image
 568  * \param y  the y coord of texel within 2D image
 569  * \param rgba  the quad to put the texel/color into
 570  *
 571  * XXX maybe move this into sp_tex_tile_cache.c and merge with the
 572  * sp_get_cached_tile_tex() function.
 573  */
 574
 575
 576
 577
 578 static INLINE const float *
 579 get_texel_2d_no_border(const struct sp_sampler_view *sp_sview,
 580                        union tex_tile_address addr, int x, int y)
 581 {
 582    const struct softpipe_tex_cached_tile *tile;
 583    addr.bits.x = x / TEX_TILE_SIZE;
 584    addr.bits.y = y / TEX_TILE_SIZE;
 585    y %= TEX_TILE_SIZE;
 586    x %= TEX_TILE_SIZE;
 587
 588    tile = sp_get_cached_tile_tex(sp_sview->cache, addr);
 589
 590    return &tile->data.color[y][x][0];
 591 }
 592
 593
 594 static INLINE const float *
 595 get_texel_2d(const struct sp_sampler_view *sp_sview,
 596              const struct sp_sampler *sp_samp,
 597              union tex_tile_address addr, int x, int y)
 598 {
 599    const struct pipe_resource *texture = sp_sview->base.texture;
 600    unsigned level = addr.bits.level;
 601
 602    if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
 603        y < 0 || y >= (int) u_minify(texture->height0, level)) {
 604       return sp_samp->base.border_color.f;
 605    }
 606    else {
 607       return get_texel_2d_no_border( sp_sview, addr, x, y );
 608    }
 609 }
 610
 611
 612 /*
 613  * Here's the complete logic (HOLY CRAP) for finding next face and doing the
 614  * corresponding coord wrapping, implemented by get_next_face,
 615  * get_next_xcoord, get_next_ycoord.
 616  * Read like that (first line):
 617  * If face is +x and s coord is below zero, then
 618  * new face is +z, new s is max , new t is old t
 619  * (max is always cube size - 1).
 620  *
 621  * +x s- -> +z: s = max,   t = t
 622  * +x s+ -> -z: s = 0,     t = t
 623  * +x t- -> +y: s = max,   t = max-s
 624  * +x t+ -> -y: s = max,   t = s
 625  *
 626  * -x s- -> -z: s = max,   t = t
 627  * -x s+ -> +z: s = 0,     t = t
 628  * -x t- -> +y: s = 0,     t = s
 629  * -x t+ -> -y: s = 0,     t = max-s
 630  *
 631  * +y s- -> -x: s = t,     t = 0
 632  * +y s+ -> +x: s = max-t, t = 0
 633  * +y t- -> -z: s = max-s, t = 0
 634  * +y t+ -> +z: s = s,     t = 0
 635  *
 636  * -y s- -> -x: s = max-t, t = max
 637  * -y s+ -> +x: s = t,     t = max
 638  * -y t- -> +z: s = s,     t = max
 639  * -y t+ -> -z: s = max-s, t = max
 640
 641  * +z s- -> -x: s = max,   t = t
 642  * +z s+ -> +x: s = 0,     t = t
 643  * +z t- -> +y: s = s,     t = max
 644  * +z t+ -> -y: s = s,     t = 0
 645
 646  * -z s- -> +x: s = max,   t = t
 647  * -z s+ -> -x: s = 0,     t = t
 648  * -z t- -> +y: s = max-s, t = 0
 649  * -z t+ -> -y: s = max-s, t = max
 650  */
 651
 652
 653 /*
 654  * seamless cubemap neighbour array.
 655  * this array is used to find the adjacent face in each of 4 directions,
 656  * left, right, up, down. (or -x, +x, -y, +y).
 657  */
 658 static const unsigned face_array[PIPE_TEX_FACE_MAX][4] = {
 659    /* pos X first then neg X is Z different, Y the same */
 660    /* PIPE_TEX_FACE_POS_X,*/
 661    { PIPE_TEX_FACE_POS_Z, PIPE_TEX_FACE_NEG_Z,
 662      PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y },
 663    /* PIPE_TEX_FACE_NEG_X */
 664    { PIPE_TEX_FACE_NEG_Z, PIPE_TEX_FACE_POS_Z,
 665      PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y },
 666
 667    /* pos Y first then neg Y is X different, X the same */
 668    /* PIPE_TEX_FACE_POS_Y */
 669    { PIPE_TEX_FACE_NEG_X, PIPE_TEX_FACE_POS_X,
 670      PIPE_TEX_FACE_NEG_Z, PIPE_TEX_FACE_POS_Z },
 671
 672    /* PIPE_TEX_FACE_NEG_Y */
 673    { PIPE_TEX_FACE_NEG_X, PIPE_TEX_FACE_POS_X,
 674      PIPE_TEX_FACE_POS_Z, PIPE_TEX_FACE_NEG_Z },
 675
 676    /* pos Z first then neg Y is X different, X the same */
 677    /* PIPE_TEX_FACE_POS_Z */
 678    { PIPE_TEX_FACE_NEG_X, PIPE_TEX_FACE_POS_X,
 679      PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y },
 680
 681    /* PIPE_TEX_FACE_NEG_Z */
 682    { PIPE_TEX_FACE_POS_X, PIPE_TEX_FACE_NEG_X,
 683      PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y }
 684 };
 685
 686 static INLINE unsigned
 687 get_next_face(unsigned face, int idx)
 688 {
 689    return face_array[face][idx];
 690 }
 691
 692 /*
 693  * return a new xcoord based on old face, old coords, cube size
 694  * and fall_off_index (0 for x-, 1 for x+, 2 for y-, 3 for y+)
 695  */
 696 static INLINE int
 697 get_next_xcoord(unsigned face, unsigned fall_off_index, int max, int xc, int yc)
 698 {
 699    if ((face == 0 && fall_off_index != 1) ||
 700        (face == 1 && fall_off_index == 0) ||
 701        (face == 4 && fall_off_index == 0) ||
 702        (face == 5 && fall_off_index == 0)) {
 703       return max;
 704    }
 705    if ((face == 1 && fall_off_index != 0) ||
 706        (face == 0 && fall_off_index == 1) ||
 707        (face == 4 && fall_off_index == 1) ||
 708        (face == 5 && fall_off_index == 1)) {
 709       return 0;
 710    }
 711    if ((face == 4 && fall_off_index >= 2) ||
 712        (face == 2 && fall_off_index == 3) ||
 713        (face == 3 && fall_off_index == 2)) {
 714       return xc;
 715    }
 716    if ((face == 5 && fall_off_index >= 2) ||
 717        (face == 2 && fall_off_index == 2) ||
 718        (face == 3 && fall_off_index == 3)) {
 719       return max - xc;
 720    }
 721    if ((face == 2 && fall_off_index == 0) ||
 722        (face == 3 && fall_off_index == 1)) {
 723       return yc;
 724    }
 725    /* (face == 2 && fall_off_index == 1) ||
 726       (face == 3 && fall_off_index == 0)) */
 727    return max - yc;
 728 }
 729
 730 /*
 731  * return a new ycoord based on old face, old coords, cube size
 732  * and fall_off_index (0 for x-, 1 for x+, 2 for y-, 3 for y+)
 733  */
 734 static INLINE int
 735 get_next_ycoord(unsigned face, unsigned fall_off_index, int max, int xc, int yc)
 736 {
 737    if ((fall_off_index <= 1) && (face <= 1 || face >= 4)) {
 738       return yc;
 739    }
 740    if (face == 2 ||
 741        (face == 4 && fall_off_index == 3) ||
 742        (face == 5 && fall_off_index == 2)) {
 743       return 0;
 744    }
 745    if (face == 3 ||
 746        (face == 4 && fall_off_index == 2) ||
 747        (face == 5 && fall_off_index == 3)) {
 748       return max;
 749    }
 750    if ((face == 0 && fall_off_index == 3) ||
 751        (face == 1 && fall_off_index == 2)) {
 752       return xc;
 753    }
 754    /* (face == 0 && fall_off_index == 2) ||
 755       (face == 1 && fall_off_index == 3) */
 756    return max - xc;
 757 }
 758
 759
 760 static INLINE const float *
 761 get_texel_cube_seamless(const struct sp_sampler_view *sp_sview,
 762                         union tex_tile_address addr, int x, int y,
 763                         float *corner)
 764 {
 765    const struct pipe_resource *texture = sp_sview->base.texture;
 766    unsigned level = addr.bits.level;
 767    unsigned face = addr.bits.face;
 768    int new_x, new_y, max_x;
 769
 770    max_x = (int) u_minify(texture->width0, level);
 771
 772    assert(texture->width0 == texture->height0);
 773    new_x = x;
 774    new_y = y;
 775
 776    /* change the face */
 777    if (x < 0) {
 778       /*
 779        * Cheat with corners. They are difficult and I believe because we don't get
 780        * per-pixel faces we can actually have multiple corner texels per pixel,
 781        * which screws things up majorly in any case (as the per spec behavior is
 782        * to average the 3 remaining texels, which we might not have).
 783        * Hence just make sure that the 2nd coord is clamped, will simply pick the
 784        * sample which would have fallen off the x coord, but not y coord.
 785        * So the filter weight of the samples will be wrong, but at least this
 786        * ensures that only valid texels near the corner are used.
 787        */
 788       if (y < 0 || y >= max_x) {
 789          y = CLAMP(y, 0, max_x - 1);
 790       }
 791       new_x = get_next_xcoord(face, 0, max_x -1, x, y);
 792       new_y = get_next_ycoord(face, 0, max_x -1, x, y);
 793       face = get_next_face(face, 0);
 794    } else if (x >= max_x) {
 795       if (y < 0 || y >= max_x) {
 796          y = CLAMP(y, 0, max_x - 1);
 797       }
 798       new_x = get_next_xcoord(face, 1, max_x -1, x, y);
 799       new_y = get_next_ycoord(face, 1, max_x -1, x, y);
 800       face = get_next_face(face, 1);
 801    } else if (y < 0) {
 802       new_x = get_next_xcoord(face, 2, max_x -1, x, y);
 803       new_y = get_next_ycoord(face, 2, max_x -1, x, y);
 804       face = get_next_face(face, 2);
 805    } else if (y >= max_x) {
 806       new_x = get_next_xcoord(face, 3, max_x -1, x, y);
 807       new_y = get_next_ycoord(face, 3, max_x -1, x, y);
 808       face = get_next_face(face, 3);
 809    }
 810
 811    addr.bits.face = face;
 812    return get_texel_2d_no_border( sp_sview, addr, new_x, new_y );
 813 }
 814
 815 /* Gather a quad of adjacent texels within a tile:
 816  */
 817 static INLINE void
 818 get_texel_quad_2d_no_border_single_tile(const struct sp_sampler_view *sp_sview,
 819                                         union tex_tile_address addr,
 820                                         unsigned x, unsigned y,
 821                                         const float *out[4])
 822 {
 823     const struct softpipe_tex_cached_tile *tile;
 824
 825    addr.bits.x = x / TEX_TILE_SIZE;
 826    addr.bits.y = y / TEX_TILE_SIZE;
 827    y %= TEX_TILE_SIZE;
 828    x %= TEX_TILE_SIZE;
 829
 830    tile = sp_get_cached_tile_tex(sp_sview->cache, addr);
 831
 832    out[0] = &tile->data.color[y  ][x  ][0];
 833    out[1] = &tile->data.color[y  ][x+1][0];
 834    out[2] = &tile->data.color[y+1][x  ][0];
 835    out[3] = &tile->data.color[y+1][x+1][0];
 836 }
 837
 838
 839 /* Gather a quad of potentially non-adjacent texels:
 840  */
 841 static INLINE void
 842 get_texel_quad_2d_no_border(const struct sp_sampler_view *sp_sview,
 843                             union tex_tile_address addr,
 844                             int x0, int y0,
 845                             int x1, int y1,
 846                             const float *out[4])
 847 {
 848    out[0] = get_texel_2d_no_border( sp_sview, addr, x0, y0 );
 849    out[1] = get_texel_2d_no_border( sp_sview, addr, x1, y0 );
 850    out[2] = get_texel_2d_no_border( sp_sview, addr, x0, y1 );
 851    out[3] = get_texel_2d_no_border( sp_sview, addr, x1, y1 );
 852 }
 853
 854 /* Can involve a lot of unnecessary checks for border color:
 855  */
 856 static INLINE void
 857 get_texel_quad_2d(const struct sp_sampler_view *sp_sview,
 858                   const struct sp_sampler *sp_samp,
 859                   union tex_tile_address addr,
 860                   int x0, int y0,
 861                   int x1, int y1,
 862                   const float *out[4])
 863 {
 864    out[0] = get_texel_2d( sp_sview, sp_samp, addr, x0, y0 );
 865    out[1] = get_texel_2d( sp_sview, sp_samp, addr, x1, y0 );
 866    out[3] = get_texel_2d( sp_sview, sp_samp, addr, x1, y1 );
 867    out[2] = get_texel_2d( sp_sview, sp_samp, addr, x0, y1 );
 868 }
 869
 870
 871
 872 /* 3d variants:
 873  */
 874 static INLINE const float *
 875 get_texel_3d_no_border(const struct sp_sampler_view *sp_sview,
 876                        union tex_tile_address addr, int x, int y, int z)
 877 {
 878    const struct softpipe_tex_cached_tile *tile;
 879
 880    addr.bits.x = x / TEX_TILE_SIZE;
 881    addr.bits.y = y / TEX_TILE_SIZE;
 882    addr.bits.z = z;
 883    y %= TEX_TILE_SIZE;
 884    x %= TEX_TILE_SIZE;
 885
 886    tile = sp_get_cached_tile_tex(sp_sview->cache, addr);
 887
 888    return &tile->data.color[y][x][0];
 889 }
 890
 891
 892 static INLINE const float *
 893 get_texel_3d(const struct sp_sampler_view *sp_sview,
 894              const struct sp_sampler *sp_samp,
 895              union tex_tile_address addr, int x, int y, int z)
 896 {
 897    const struct pipe_resource *texture = sp_sview->base.texture;
 898    unsigned level = addr.bits.level;
 899
 900    if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
 901        y < 0 || y >= (int) u_minify(texture->height0, level) ||
 902        z < 0 || z >= (int) u_minify(texture->depth0, level)) {
 903       return sp_samp->base.border_color.f;
 904    }
 905    else {
 906       return get_texel_3d_no_border( sp_sview, addr, x, y, z );
 907    }
 908 }
 909
 910
 911 /* Get texel pointer for 1D array texture */
 912 static INLINE const float *
 913 get_texel_1d_array(const struct sp_sampler_view *sp_sview,
 914                    const struct sp_sampler *sp_samp,
 915                    union tex_tile_address addr, int x, int y)
 916 {
 917    const struct pipe_resource *texture = sp_sview->base.texture;
 918    unsigned level = addr.bits.level;
 919
 920    if (x < 0 || x >= (int) u_minify(texture->width0, level)) {
 921       return sp_samp->base.border_color.f;
 922    }
 923    else {
 924       return get_texel_2d_no_border(sp_sview, addr, x, y);
 925    }
 926 }
 927
 928
 929 /* Get texel pointer for 2D array texture */
 930 static INLINE const float *
 931 get_texel_2d_array(const struct sp_sampler_view *sp_sview,
 932                    const struct sp_sampler *sp_samp,
 933                    union tex_tile_address addr, int x, int y, int layer)
 934 {
 935    const struct pipe_resource *texture = sp_sview->base.texture;
 936    unsigned level = addr.bits.level;
 937
 938    assert(layer < (int) texture->array_size);
 939    assert(layer >= 0);
 940
 941    if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
 942        y < 0 || y >= (int) u_minify(texture->height0, level)) {
 943       return sp_samp->base.border_color.f;
 944    }
 945    else {
 946       return get_texel_3d_no_border(sp_sview, addr, x, y, layer);
 947    }
 948 }
 949
 950
 951 /* Get texel pointer for cube array texture */
 952 static INLINE const float *
 953 get_texel_cube_array(const struct sp_sampler_view *sp_sview,
 954                      const struct sp_sampler *sp_samp,
 955                      union tex_tile_address addr, int x, int y, int layer)
 956 {
 957    const struct pipe_resource *texture = sp_sview->base.texture;
 958    unsigned level = addr.bits.level;
 959
 960    assert(layer < (int) texture->array_size);
 961    assert(layer >= 0);
 962
 963    if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
 964        y < 0 || y >= (int) u_minify(texture->height0, level)) {
 965       return sp_samp->base.border_color.f;
 966    }
 967    else {
 968       return get_texel_3d_no_border(sp_sview, addr, x, y, layer);
 969    }
 970 }
 971 /**
 972  * Given the logbase2 of a mipmap's base level size and a mipmap level,
 973  * return the size (in texels) of that mipmap level.
 974  * For example, if level[0].width = 256 then base_pot will be 8.
 975  * If level = 2, then we'll return 64 (the width at level=2).
 976  * Return 1 if level > base_pot.
 977  */
 978 static INLINE unsigned
 979 pot_level_size(unsigned base_pot, unsigned level)
 980 {
 981    return (base_pot >= level) ? (1 << (base_pot - level)) : 1;
 982 }
 983
 984
 985 static void
 986 print_sample(const char *function, const float *rgba)
 987 {
 988    debug_printf("%s %g %g %g %g\n",
 989                 function,
 990                 rgba[0], rgba[TGSI_NUM_CHANNELS], rgba[2*TGSI_NUM_CHANNELS], rgba[3*TGSI_NUM_CHANNELS]);
 991 }
 992
 993
 994 static void
 995 print_sample_4(const char *function, float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
 996 {
 997    debug_printf("%s %g %g %g %g, %g %g %g %g, %g %g %g %g, %g %g %g %g\n",
 998                 function,
 999                 rgba[0][0], rgba[1][0], rgba[2][0], rgba[3][0],
1000                 rgba[0][1], rgba[1][1], rgba[2][1], rgba[3][1],
1001                 rgba[0][2], rgba[1][2], rgba[2][2], rgba[3][2],
1002                 rgba[0][3], rgba[1][3], rgba[2][3], rgba[3][3]);
1003 }
1004
1005
1006 /* Some image-filter fastpaths:
1007  */
1008 static INLINE void
1009 img_filter_2d_linear_repeat_POT(struct sp_sampler_view *sp_sview,
1010                                 struct sp_sampler *sp_samp,
1011                                 float s,
1012                                 float t,
1013                                 float p,
1014                                 unsigned level,
1015                                 unsigned face_id,
1016                                 float *rgba)
1017 {
1018    unsigned xpot = pot_level_size(sp_sview->xpot, level);
1019    unsigned ypot = pot_level_size(sp_sview->ypot, level);
1020    int xmax = (xpot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, xpot) - 1; */
1021    int ymax = (ypot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, ypot) - 1; */
1022    union tex_tile_address addr;
1023    int c;
1024
1025    float u = s * xpot - 0.5F;
1026    float v = t * ypot - 0.5F;
1027
1028    int uflr = util_ifloor(u);
1029    int vflr = util_ifloor(v);
1030
1031    float xw = u - (float)uflr;
1032    float yw = v - (float)vflr;
1033
1034    int x0 = uflr & (xpot - 1);
1035    int y0 = vflr & (ypot - 1);
1036
1037    const float *tx[4];
1038
1039    addr.value = 0;
1040    addr.bits.level = level;
1041
1042    /* Can we fetch all four at once:
1043     */
1044    if (x0 < xmax && y0 < ymax) {
1045       get_texel_quad_2d_no_border_single_tile(sp_sview, addr, x0, y0, tx);
1046    }
1047    else {
1048       unsigned x1 = (x0 + 1) & (xpot - 1);
1049       unsigned y1 = (y0 + 1) & (ypot - 1);
1050       get_texel_quad_2d_no_border(sp_sview, addr, x0, y0, x1, y1, tx);
1051    }
1052
1053    /* interpolate R, G, B, A */
1054    for (c = 0; c < TGSI_QUAD_SIZE; c++) {
1055       rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1056                                        tx[0][c], tx[1][c],
1057                                        tx[2][c], tx[3][c]);
1058    }
1059
1060    if (DEBUG_TEX) {
1061       print_sample(__FUNCTION__, rgba);
1062    }
1063 }
1064
1065
1066 static INLINE void
1067 img_filter_2d_nearest_repeat_POT(struct sp_sampler_view *sp_sview,
1068                                  struct sp_sampler *sp_samp,
1069                                  float s,
1070                                  float t,
1071                                  float p,
1072                                  unsigned level,
1073                                  unsigned face_id,
1074                                  float rgba[TGSI_QUAD_SIZE])
1075 {
1076    unsigned xpot = pot_level_size(sp_sview->xpot, level);
1077    unsigned ypot = pot_level_size(sp_sview->ypot, level);
1078    const float *out;
1079    union tex_tile_address addr;
1080    int c;
1081
1082    float u = s * xpot;
1083    float v = t * ypot;
1084
1085    int uflr = util_ifloor(u);
1086    int vflr = util_ifloor(v);
1087
1088    int x0 = uflr & (xpot - 1);
1089    int y0 = vflr & (ypot - 1);
1090
1091    addr.value = 0;
1092    addr.bits.level = level;
1093
1094    out = get_texel_2d_no_border(sp_sview, addr, x0, y0);
1095    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1096       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1097
1098    if (DEBUG_TEX) {
1099       print_sample(__FUNCTION__, rgba);
1100    }
1101 }
1102
1103
1104 static INLINE void
1105 img_filter_2d_nearest_clamp_POT(struct sp_sampler_view *sp_sview,
1106                                 struct sp_sampler *sp_samp,
1107                                 float s,
1108                                 float t,
1109                                 float p,
1110                                 unsigned level,
1111                                 unsigned face_id,
1112                                 float rgba[TGSI_QUAD_SIZE])
1113 {
1114    unsigned xpot = pot_level_size(sp_sview->xpot, level);
1115    unsigned ypot = pot_level_size(sp_sview->ypot, level);
1116    union tex_tile_address addr;
1117    int c;
1118
1119    float u = s * xpot;
1120    float v = t * ypot;
1121
1122    int x0, y0;
1123    const float *out;
1124
1125    addr.value = 0;
1126    addr.bits.level = level;
1127
1128    x0 = util_ifloor(u);
1129    if (x0 < 0)
1130       x0 = 0;
1131    else if (x0 > (int) xpot - 1)
1132       x0 = xpot - 1;
1133
1134    y0 = util_ifloor(v);
1135    if (y0 < 0)
1136       y0 = 0;
1137    else if (y0 > (int) ypot - 1)
1138       y0 = ypot - 1;
1139
1140    out = get_texel_2d_no_border(sp_sview, addr, x0, y0);
1141    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1142       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1143
1144    if (DEBUG_TEX) {
1145       print_sample(__FUNCTION__, rgba);
1146    }
1147 }
1148
1149
1150 static void
1151 img_filter_1d_nearest(struct sp_sampler_view *sp_sview,
1152                       struct sp_sampler *sp_samp,
1153                       float s,
1154                       float t,
1155                       float p,
1156                       unsigned level,
1157                       unsigned face_id,
1158                       float rgba[TGSI_QUAD_SIZE])
1159 {
1160    const struct pipe_resource *texture = sp_sview->base.texture;
1161    int width;
1162    int x;
1163    union tex_tile_address addr;
1164    const float *out;
1165    int c;
1166
1167    width = u_minify(texture->width0, level);
1168
1169    assert(width > 0);
1170
1171    addr.value = 0;
1172    addr.bits.level = level;
1173
1174    sp_samp->nearest_texcoord_s(s, width, &x);
1175
1176    out = get_texel_2d(sp_sview, sp_samp, addr, x, 0);
1177    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1178       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1179
1180    if (DEBUG_TEX) {
1181       print_sample(__FUNCTION__, rgba);
1182    }
1183 }
1184
1185
1186 static void
1187 img_filter_1d_array_nearest(struct sp_sampler_view *sp_sview,
1188                             struct sp_sampler *sp_samp,
1189                             float s,
1190                             float t,
1191                             float p,
1192                             unsigned level,
1193                             unsigned face_id,
1194                             float *rgba)
1195 {
1196    const struct pipe_resource *texture = sp_sview->base.texture;
1197    int width;
1198    int x, layer;
1199    union tex_tile_address addr;
1200    const float *out;
1201    int c;
1202
1203    width = u_minify(texture->width0, level);
1204
1205    assert(width > 0);
1206
1207    addr.value = 0;
1208    addr.bits.level = level;
1209
1210    sp_samp->nearest_texcoord_s(s, width, &x);
1211    wrap_array_layer(t, texture->array_size, &layer);
1212
1213    out = get_texel_1d_array(sp_sview, sp_samp, addr, x, layer);
1214    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1215       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1216
1217    if (DEBUG_TEX) {
1218       print_sample(__FUNCTION__, rgba);
1219    }
1220 }
1221
1222
1223 static void
1224 img_filter_2d_nearest(struct sp_sampler_view *sp_sview,
1225                       struct sp_sampler *sp_samp,
1226                       float s,
1227                       float t,
1228                       float p,
1229                       unsigned level,
1230                       unsigned face_id,
1231                       float *rgba)
1232 {
1233    const struct pipe_resource *texture = sp_sview->base.texture;
1234    int width, height;
1235    int x, y;
1236    union tex_tile_address addr;
1237    const float *out;
1238    int c;
1239
1240    width = u_minify(texture->width0, level);
1241    height = u_minify(texture->height0, level);
1242
1243    assert(width > 0);
1244    assert(height > 0);
1245
1246    addr.value = 0;
1247    addr.bits.level = level;
1248
1249    sp_samp->nearest_texcoord_s(s, width, &x);
1250    sp_samp->nearest_texcoord_t(t, height, &y);
1251
1252    out = get_texel_2d(sp_sview, sp_samp, addr, x, y);
1253    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1254       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1255
1256    if (DEBUG_TEX) {
1257       print_sample(__FUNCTION__, rgba);
1258    }
1259 }
1260
1261
1262 static void
1263 img_filter_2d_array_nearest(struct sp_sampler_view *sp_sview,
1264                             struct sp_sampler *sp_samp,
1265                             float s,
1266                             float t,
1267                             float p,
1268                             unsigned level,
1269                             unsigned face_id,
1270                             float *rgba)
1271 {
1272    const struct pipe_resource *texture = sp_sview->base.texture;
1273    int width, height;
1274    int x, y, layer;
1275    union tex_tile_address addr;
1276    const float *out;
1277    int c;
1278
1279    width = u_minify(texture->width0, level);
1280    height = u_minify(texture->height0, level);
1281
1282    assert(width > 0);
1283    assert(height > 0);
1284
1285    addr.value = 0;
1286    addr.bits.level = level;
1287
1288    sp_samp->nearest_texcoord_s(s, width, &x);
1289    sp_samp->nearest_texcoord_t(t, height, &y);
1290    wrap_array_layer(p, texture->array_size, &layer);
1291
1292    out = get_texel_2d_array(sp_sview, sp_samp, addr, x, y, layer);
1293    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1294       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1295
1296    if (DEBUG_TEX) {
1297       print_sample(__FUNCTION__, rgba);
1298    }
1299 }
1300
1301
1302 static INLINE union tex_tile_address
1303 face(union tex_tile_address addr, unsigned face )
1304 {
1305    addr.bits.face = face;
1306    return addr;
1307 }
1308
1309
1310 static void
1311 img_filter_cube_nearest(struct sp_sampler_view *sp_sview,
1312                         struct sp_sampler *sp_samp,
1313                         float s,
1314                         float t,
1315                         float p,
1316                         unsigned level,
1317                         unsigned face_id,
1318                         float *rgba)
1319 {
1320    const struct pipe_resource *texture = sp_sview->base.texture;
1321    int width, height;
1322    int x, y;
1323    union tex_tile_address addr;
1324    const float *out;
1325    int c;
1326
1327    width = u_minify(texture->width0, level);
1328    height = u_minify(texture->height0, level);
1329
1330    assert(width > 0);
1331    assert(height > 0);
1332
1333    addr.value = 0;
1334    addr.bits.level = level;
1335
1336    /*
1337     * If NEAREST filtering is done within a miplevel, always apply wrap
1338     * mode CLAMP_TO_EDGE.
1339     */
1340    if (sp_samp->base.seamless_cube_map) {
1341       wrap_nearest_clamp_to_edge(s, width, &x);
1342       wrap_nearest_clamp_to_edge(t, height, &y);
1343    } else {
1344       /* Would probably make sense to ignore mode and just do edge clamp */
1345       sp_samp->nearest_texcoord_s(s, width, &x);
1346       sp_samp->nearest_texcoord_t(t, height, &y);
1347    }
1348
1349    out = get_texel_2d(sp_sview, sp_samp, face(addr, face_id), x, y);
1350    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1351       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1352
1353    if (DEBUG_TEX) {
1354       print_sample(__FUNCTION__, rgba);
1355    }
1356 }
1357
1358 static void
1359 img_filter_cube_array_nearest(struct sp_sampler_view *sp_sview,
1360                               struct sp_sampler *sp_samp,
1361                               float s,
1362                               float t,
1363                               float p,
1364                               unsigned level,
1365                               unsigned face_id,
1366                               float *rgba)
1367 {
1368    const struct pipe_resource *texture = sp_sview->base.texture;
1369    int width, height;
1370    int x, y, layer;
1371    union tex_tile_address addr;
1372    const float *out;
1373    int c;
1374
1375    width = u_minify(texture->width0, level);
1376    height = u_minify(texture->height0, level);
1377
1378    assert(width > 0);
1379    assert(height > 0);
1380
1381    addr.value = 0;
1382    addr.bits.level = level;
1383
1384    sp_samp->nearest_texcoord_s(s, width, &x);
1385    sp_samp->nearest_texcoord_t(t, height, &y);
1386    wrap_array_layer(p, texture->array_size, &layer);
1387
1388    out = get_texel_cube_array(sp_sview, sp_samp, addr, x, y, layer * 6 + face_id);
1389    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1390       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1391
1392    if (DEBUG_TEX) {
1393       print_sample(__FUNCTION__, rgba);
1394    }
1395 }
1396
1397 static void
1398 img_filter_3d_nearest(struct sp_sampler_view *sp_sview,
1399                       struct sp_sampler *sp_samp,
1400                       float s,
1401                       float t,
1402                       float p,
1403                       unsigned level,
1404                       unsigned face_id,
1405                       float *rgba)
1406 {
1407    const struct pipe_resource *texture = sp_sview->base.texture;
1408    int width, height, depth;
1409    int x, y, z;
1410    union tex_tile_address addr;
1411    const float *out;
1412    int c;
1413
1414    width = u_minify(texture->width0, level);
1415    height = u_minify(texture->height0, level);
1416    depth = u_minify(texture->depth0, level);
1417
1418    assert(width > 0);
1419    assert(height > 0);
1420    assert(depth > 0);
1421
1422    sp_samp->nearest_texcoord_s(s, width,  &x);
1423    sp_samp->nearest_texcoord_t(t, height, &y);
1424    sp_samp->nearest_texcoord_p(p, depth,  &z);
1425
1426    addr.value = 0;
1427    addr.bits.level = level;
1428
1429    out = get_texel_3d(sp_sview, sp_samp, addr, x, y, z);
1430    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1431       rgba[TGSI_NUM_CHANNELS*c] = out[c];
1432 }
1433
1434
1435 static void
1436 img_filter_1d_linear(struct sp_sampler_view *sp_sview,
1437                      struct sp_sampler *sp_samp,
1438                      float s,
1439                      float t,
1440                      float p,
1441                      unsigned level,
1442                      unsigned face_id,
1443                      float *rgba)
1444 {
1445    const struct pipe_resource *texture = sp_sview->base.texture;
1446    int width;
1447    int x0, x1;
1448    float xw; /* weights */
1449    union tex_tile_address addr;
1450    const float *tx0, *tx1;
1451    int c;
1452
1453    width = u_minify(texture->width0, level);
1454
1455    assert(width > 0);
1456
1457    addr.value = 0;
1458    addr.bits.level = level;
1459
1460    sp_samp->linear_texcoord_s(s, width, &x0, &x1, &xw);
1461
1462    tx0 = get_texel_2d(sp_sview, sp_samp, addr, x0, 0);
1463    tx1 = get_texel_2d(sp_sview, sp_samp, addr, x1, 0);
1464
1465    /* interpolate R, G, B, A */
1466    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1467       rgba[TGSI_NUM_CHANNELS*c] = lerp(xw, tx0[c], tx1[c]);
1468 }
1469
1470
1471 static void
1472 img_filter_1d_array_linear(struct sp_sampler_view *sp_sview,
1473                            struct sp_sampler *sp_samp,
1474                            float s,
1475                            float t,
1476                            float p,
1477                            unsigned level,
1478                            unsigned face_id,
1479                            float *rgba)
1480 {
1481    const struct pipe_resource *texture = sp_sview->base.texture;
1482    int width;
1483    int x0, x1, layer;
1484    float xw; /* weights */
1485    union tex_tile_address addr;
1486    const float *tx0, *tx1;
1487    int c;
1488
1489    width = u_minify(texture->width0, level);
1490
1491    assert(width > 0);
1492
1493    addr.value = 0;
1494    addr.bits.level = level;
1495
1496    sp_samp->linear_texcoord_s(s, width, &x0, &x1, &xw);
1497    wrap_array_layer(t, texture->array_size, &layer);
1498
1499    tx0 = get_texel_1d_array(sp_sview, sp_samp, addr, x0, layer);
1500    tx1 = get_texel_1d_array(sp_sview, sp_samp, addr, x1, layer);
1501
1502    /* interpolate R, G, B, A */
1503    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1504       rgba[TGSI_NUM_CHANNELS*c] = lerp(xw, tx0[c], tx1[c]);
1505 }
1506
1507
1508 static void
1509 img_filter_2d_linear(struct sp_sampler_view *sp_sview,
1510                      struct sp_sampler *sp_samp,
1511                      float s,
1512                      float t,
1513                      float p,
1514                      unsigned level,
1515                      unsigned face_id,
1516                      float *rgba)
1517 {
1518    const struct pipe_resource *texture = sp_sview->base.texture;
1519    int width, height;
1520    int x0, y0, x1, y1;
1521    float xw, yw; /* weights */
1522    union tex_tile_address addr;
1523    const float *tx0, *tx1, *tx2, *tx3;
1524    int c;
1525
1526    width = u_minify(texture->width0, level);
1527    height = u_minify(texture->height0, level);
1528
1529    assert(width > 0);
1530    assert(height > 0);
1531
1532    addr.value = 0;
1533    addr.bits.level = level;
1534
1535    sp_samp->linear_texcoord_s(s, width,  &x0, &x1, &xw);
1536    sp_samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
1537
1538    tx0 = get_texel_2d(sp_sview, sp_samp, addr, x0, y0);
1539    tx1 = get_texel_2d(sp_sview, sp_samp, addr, x1, y0);
1540    tx2 = get_texel_2d(sp_sview, sp_samp, addr, x0, y1);
1541    tx3 = get_texel_2d(sp_sview, sp_samp, addr, x1, y1);
1542
1543    /* interpolate R, G, B, A */
1544    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1545       rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1546                                           tx0[c], tx1[c],
1547                                           tx2[c], tx3[c]);
1548 }
1549
1550
1551 static void
1552 img_filter_2d_array_linear(struct sp_sampler_view *sp_sview,
1553                            struct sp_sampler *sp_samp,
1554                            float s,
1555                            float t,
1556                            float p,
1557                            unsigned level,
1558                            unsigned face_id,
1559                            float *rgba)
1560 {
1561    const struct pipe_resource *texture = sp_sview->base.texture;
1562    int width, height;
1563    int x0, y0, x1, y1, layer;
1564    float xw, yw; /* weights */
1565    union tex_tile_address addr;
1566    const float *tx0, *tx1, *tx2, *tx3;
1567    int c;
1568
1569    width = u_minify(texture->width0, level);
1570    height = u_minify(texture->height0, level);
1571
1572    assert(width > 0);
1573    assert(height > 0);
1574
1575    addr.value = 0;
1576    addr.bits.level = level;
1577
1578    sp_samp->linear_texcoord_s(s, width,  &x0, &x1, &xw);
1579    sp_samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
1580    wrap_array_layer(p, texture->array_size, &layer);
1581
1582    tx0 = get_texel_2d_array(sp_sview, sp_samp, addr, x0, y0, layer);
1583    tx1 = get_texel_2d_array(sp_sview, sp_samp, addr, x1, y0, layer);
1584    tx2 = get_texel_2d_array(sp_sview, sp_samp, addr, x0, y1, layer);
1585    tx3 = get_texel_2d_array(sp_sview, sp_samp, addr, x1, y1, layer);
1586
1587    /* interpolate R, G, B, A */
1588    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1589       rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1590                                           tx0[c], tx1[c],
1591                                           tx2[c], tx3[c]);
1592 }
1593
1594
1595 static void
1596 img_filter_cube_linear(struct sp_sampler_view *sp_sview,
1597                        struct sp_sampler *sp_samp,
1598                        float s,
1599                        float t,
1600                        float p,
1601                        unsigned level,
1602                        unsigned face_id,
1603                        float *rgba)
1604 {
1605    const struct pipe_resource *texture = sp_sview->base.texture;
1606    int width, height;
1607    int x0, y0, x1, y1;
1608    float xw, yw; /* weights */
1609    union tex_tile_address addr, addrj;
1610    const float *tx0, *tx1, *tx2, *tx3;
1611    float corner0[TGSI_QUAD_SIZE], corner1[TGSI_QUAD_SIZE],
1612          corner2[TGSI_QUAD_SIZE], corner3[TGSI_QUAD_SIZE];
1613    int c;
1614
1615    width = u_minify(texture->width0, level);
1616    height = u_minify(texture->height0, level);
1617
1618    assert(width > 0);
1619    assert(height > 0);
1620
1621    addr.value = 0;
1622    addr.bits.level = level;
1623
1624    /*
1625     * For seamless if LINEAR filtering is done within a miplevel,
1626     * always apply wrap mode CLAMP_TO_BORDER.
1627     */
1628    if (sp_samp->base.seamless_cube_map) {
1629       /* Note this is a bit overkill, actual clamping is not required */
1630       wrap_linear_clamp_to_border(s, width, &x0, &x1, &xw);
1631       wrap_linear_clamp_to_border(t, height, &y0, &y1, &yw);
1632    } else {
1633       /* Would probably make sense to ignore mode and just do edge clamp */
1634       sp_samp->linear_texcoord_s(s, width,  &x0, &x1, &xw);
1635       sp_samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
1636    }
1637
1638    addrj = face(addr, face_id);
1639
1640    if (sp_samp->base.seamless_cube_map) {
1641       tx0 = get_texel_cube_seamless(sp_sview, addrj, x0, y0, corner0);
1642       tx1 = get_texel_cube_seamless(sp_sview, addrj, x1, y0, corner1);
1643       tx2 = get_texel_cube_seamless(sp_sview, addrj, x0, y1, corner2);
1644       tx3 = get_texel_cube_seamless(sp_sview, addrj, x1, y1, corner3);
1645    } else {
1646       tx0 = get_texel_2d(sp_sview, sp_samp, addrj, x0, y0);
1647       tx1 = get_texel_2d(sp_sview, sp_samp, addrj, x1, y0);
1648       tx2 = get_texel_2d(sp_sview, sp_samp, addrj, x0, y1);
1649       tx3 = get_texel_2d(sp_sview, sp_samp, addrj, x1, y1);
1650    }
1651    /* interpolate R, G, B, A */
1652    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1653       rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1654                                           tx0[c], tx1[c],
1655                                           tx2[c], tx3[c]);
1656 }
1657
1658
1659 static void
1660 img_filter_cube_array_linear(struct sp_sampler_view *sp_sview,
1661                              struct sp_sampler *sp_samp,
1662                              float s,
1663                              float t,
1664                              float p,
1665                              unsigned level,
1666                              unsigned face_id,
1667                              float *rgba)
1668 {
1669    const struct pipe_resource *texture = sp_sview->base.texture;
1670    int width, height;
1671    int x0, y0, x1, y1, layer;
1672    float xw, yw; /* weights */
1673    union tex_tile_address addr;
1674    const float *tx0, *tx1, *tx2, *tx3;
1675    int c;
1676
1677    width = u_minify(texture->width0, level);
1678    height = u_minify(texture->height0, level);
1679
1680    assert(width > 0);
1681    assert(height > 0);
1682
1683    addr.value = 0;
1684    addr.bits.level = level;
1685
1686    sp_samp->linear_texcoord_s(s, width,  &x0, &x1, &xw);
1687    sp_samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
1688    wrap_array_layer(p, texture->array_size, &layer);
1689
1690    tx0 = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y0, layer * 6 + face_id);
1691    tx1 = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y0, layer * 6 + face_id);
1692    tx2 = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y1, layer * 6 + face_id);
1693    tx3 = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y1, layer * 6 + face_id);
1694
1695    /* interpolate R, G, B, A */
1696    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1697       rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1698                                           tx0[c], tx1[c],
1699                                           tx2[c], tx3[c]);
1700 }
1701
1702 static void
1703 img_filter_3d_linear(struct sp_sampler_view *sp_sview,
1704                      struct sp_sampler *sp_samp,
1705                      float s,
1706                      float t,
1707                      float p,
1708                      unsigned level,
1709                      unsigned face_id,
1710                      float *rgba)
1711 {
1712    const struct pipe_resource *texture = sp_sview->base.texture;
1713    int width, height, depth;
1714    int x0, x1, y0, y1, z0, z1;
1715    float xw, yw, zw; /* interpolation weights */
1716    union tex_tile_address addr;
1717    const float *tx00, *tx01, *tx02, *tx03, *tx10, *tx11, *tx12, *tx13;
1718    int c;
1719
1720    width = u_minify(texture->width0, level);
1721    height = u_minify(texture->height0, level);
1722    depth = u_minify(texture->depth0, level);
1723
1724    addr.value = 0;
1725    addr.bits.level = level;
1726
1727    assert(width > 0);
1728    assert(height > 0);
1729    assert(depth > 0);
1730
1731    sp_samp->linear_texcoord_s(s, width,  &x0, &x1, &xw);
1732    sp_samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
1733    sp_samp->linear_texcoord_p(p, depth,  &z0, &z1, &zw);
1734
1735
1736    tx00 = get_texel_3d(sp_sview, sp_samp, addr, x0, y0, z0);
1737    tx01 = get_texel_3d(sp_sview, sp_samp, addr, x1, y0, z0);
1738    tx02 = get_texel_3d(sp_sview, sp_samp, addr, x0, y1, z0);
1739    tx03 = get_texel_3d(sp_sview, sp_samp, addr, x1, y1, z0);
1740
1741    tx10 = get_texel_3d(sp_sview, sp_samp, addr, x0, y0, z1);
1742    tx11 = get_texel_3d(sp_sview, sp_samp, addr, x1, y0, z1);
1743    tx12 = get_texel_3d(sp_sview, sp_samp, addr, x0, y1, z1);
1744    tx13 = get_texel_3d(sp_sview, sp_samp, addr, x1, y1, z1);
1745
1746       /* interpolate R, G, B, A */
1747    for (c = 0; c < TGSI_QUAD_SIZE; c++)
1748       rgba[TGSI_NUM_CHANNELS*c] =  lerp_3d(xw, yw, zw,
1749                                            tx00[c], tx01[c],
1750                                            tx02[c], tx03[c],
1751                                            tx10[c], tx11[c],
1752                                            tx12[c], tx13[c]);
1753 }
1754
1755
1756 /* Calculate level of detail for every fragment,
1757  * with lambda already computed.
1758  * Note that lambda has already been biased by global LOD bias.
1759  * \param biased_lambda per-quad lambda.
1760  * \param lod_in per-fragment lod_bias or explicit_lod.
1761  * \param lod returns the per-fragment lod.
1762  */
1763 static INLINE void
1764 compute_lod(const struct pipe_sampler_state *sampler,
1765             enum tgsi_sampler_control control,
1766             const float biased_lambda,
1767             const float lod_in[TGSI_QUAD_SIZE],
1768             float lod[TGSI_QUAD_SIZE])
1769 {
1770    float min_lod = sampler->min_lod;
1771    float max_lod = sampler->max_lod;
1772    uint i;
1773
1774    switch (control) {
1775    case tgsi_sampler_lod_none:
1776    case tgsi_sampler_lod_zero:
1777    /* XXX FIXME */
1778    case tgsi_sampler_derivs_explicit:
1779       lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(biased_lambda, min_lod, max_lod);
1780       break;
1781    case tgsi_sampler_lod_bias:
1782       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1783          lod[i] = biased_lambda + lod_in[i];
1784          lod[i] = CLAMP(lod[i], min_lod, max_lod);
1785       }
1786       break;
1787    case tgsi_sampler_lod_explicit:
1788       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1789          lod[i] = CLAMP(lod_in[i], min_lod, max_lod);
1790       }
1791       break;
1792    default:
1793       assert(0);
1794       lod[0] = lod[1] = lod[2] = lod[3] = 0.0f;
1795    }
1796 }
1797
1798
1799 /* Calculate level of detail for every fragment.
1800  * \param lod_in per-fragment lod_bias or explicit_lod.
1801  * \param lod results per-fragment lod.
1802  */
1803 static INLINE void
1804 compute_lambda_lod(struct sp_sampler_view *sp_sview,
1805                    struct sp_sampler *sp_samp,
1806                    const float s[TGSI_QUAD_SIZE],
1807                    const float t[TGSI_QUAD_SIZE],
1808                    const float p[TGSI_QUAD_SIZE],
1809                    const float lod_in[TGSI_QUAD_SIZE],
1810                    enum tgsi_sampler_control control,
1811                    float lod[TGSI_QUAD_SIZE])
1812 {
1813    const struct pipe_sampler_state *sampler = &sp_samp->base;
1814    float lod_bias = sampler->lod_bias;
1815    float min_lod = sampler->min_lod;
1816    float max_lod = sampler->max_lod;
1817    float lambda;
1818    uint i;
1819
1820    switch (control) {
1821    case tgsi_sampler_lod_none:
1822       /* XXX FIXME */
1823    case tgsi_sampler_derivs_explicit:
1824       lambda = sp_sview->compute_lambda(sp_sview, s, t, p) + lod_bias;
1825       lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(lambda, min_lod, max_lod);
1826       break;
1827    case tgsi_sampler_lod_bias:
1828       lambda = sp_sview->compute_lambda(sp_sview, s, t, p) + lod_bias;
1829       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1830          lod[i] = lambda + lod_in[i];
1831          lod[i] = CLAMP(lod[i], min_lod, max_lod);
1832       }
1833       break;
1834    case tgsi_sampler_lod_explicit:
1835       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1836          lod[i] = CLAMP(lod_in[i], min_lod, max_lod);
1837       }
1838       break;
1839    case tgsi_sampler_lod_zero:
1840       /* this is all static state in the sampler really need clamp here? */
1841       lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(lod_bias, min_lod, max_lod);
1842       break;
1843    default:
1844       assert(0);
1845       lod[0] = lod[1] = lod[2] = lod[3] = 0.0f;
1846    }
1847 }
1848
1849
1850 static void
1851 mip_filter_linear(struct sp_sampler_view *sp_sview,
1852                   struct sp_sampler *sp_samp,
1853                   img_filter_func min_filter,
1854                   img_filter_func mag_filter,
1855                   const float s[TGSI_QUAD_SIZE],
1856                   const float t[TGSI_QUAD_SIZE],
1857                   const float p[TGSI_QUAD_SIZE],
1858                   const float c0[TGSI_QUAD_SIZE],
1859                   const float lod_in[TGSI_QUAD_SIZE],
1860                   enum tgsi_sampler_control control,
1861                   float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1862 {
1863    const struct pipe_resource *texture = sp_sview->base.texture;
1864    int j;
1865    float lod[TGSI_QUAD_SIZE];
1866
1867    compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, control, lod);
1868
1869    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1870       int level0 = sp_sview->base.u.tex.first_level + (int)lod[j];
1871
1872       if (lod[j] < 0.0)
1873          mag_filter(sp_sview, sp_samp, s[j], t[j], p[j],
1874                     sp_sview->base.u.tex.first_level,
1875                     sp_sview->faces[j], &rgba[0][j]);
1876
1877       else if (level0 >= (int) texture->last_level)
1878          min_filter(sp_sview, sp_samp, s[j], t[j], p[j], texture->last_level,
1879                     sp_sview->faces[j], &rgba[0][j]);
1880
1881       else {
1882          float levelBlend = frac(lod[j]);
1883          float rgbax[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
1884          int c;
1885
1886          min_filter(sp_sview, sp_samp, s[j], t[j], p[j], level0,
1887                     sp_sview->faces[j], &rgbax[0][0]);
1888          min_filter(sp_sview, sp_samp, s[j], t[j], p[j], level0+1,
1889                     sp_sview->faces[j], &rgbax[0][1]);
1890
1891          for (c = 0; c < 4; c++) {
1892             rgba[c][j] = lerp(levelBlend, rgbax[c][0], rgbax[c][1]);
1893          }
1894       }
1895    }
1896
1897    if (DEBUG_TEX) {
1898       print_sample_4(__FUNCTION__, rgba);
1899    }
1900 }
1901
1902
1903 /**
1904  * Compute nearest mipmap level from texcoords.
1905  * Then sample the texture level for four elements of a quad.
1906  * \param c0  the LOD bias factors, or absolute LODs (depending on control)
1907  */
1908 static void
1909 mip_filter_nearest(struct sp_sampler_view *sp_sview,
1910                    struct sp_sampler *sp_samp,
1911                    img_filter_func min_filter,
1912                    img_filter_func mag_filter,
1913                    const float s[TGSI_QUAD_SIZE],
1914                    const float t[TGSI_QUAD_SIZE],
1915                    const float p[TGSI_QUAD_SIZE],
1916                    const float c0[TGSI_QUAD_SIZE],
1917                    const float lod_in[TGSI_QUAD_SIZE],
1918                    enum tgsi_sampler_control control,
1919                    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1920 {
1921    const struct pipe_resource *texture = sp_sview->base.texture;
1922    float lod[TGSI_QUAD_SIZE];
1923    int j;
1924
1925    compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, control, lod);
1926
1927    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1928       if (lod[j] < 0.0)
1929          mag_filter(sp_sview, sp_samp, s[j], t[j], p[j],
1930                     sp_sview->base.u.tex.first_level,
1931                     sp_sview->faces[j], &rgba[0][j]);
1932       else {
1933          int level = sp_sview->base.u.tex.first_level + (int)(lod[j] + 0.5F);
1934          level = MIN2(level, (int)texture->last_level);
1935          min_filter(sp_sview, sp_samp, s[j], t[j], p[j],
1936                     level, sp_sview->faces[j], &rgba[0][j]);
1937       }
1938    }
1939
1940    if (DEBUG_TEX) {
1941       print_sample_4(__FUNCTION__, rgba);
1942    }
1943 }
1944
1945
1946 static void
1947 mip_filter_none(struct sp_sampler_view *sp_sview,
1948                 struct sp_sampler *sp_samp,
1949                 img_filter_func min_filter,
1950                 img_filter_func mag_filter,
1951                 const float s[TGSI_QUAD_SIZE],
1952                 const float t[TGSI_QUAD_SIZE],
1953                 const float p[TGSI_QUAD_SIZE],
1954                 const float c0[TGSI_QUAD_SIZE],
1955                 const float lod_in[TGSI_QUAD_SIZE],
1956                 enum tgsi_sampler_control control,
1957                 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1958 {
1959    float lod[TGSI_QUAD_SIZE];
1960    int j;
1961
1962    compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, control, lod);
1963
1964    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1965       if (lod[j] < 0.0) {
1966          mag_filter(sp_sview, sp_samp, s[j], t[j], p[j],
1967                     sp_sview->base.u.tex.first_level,
1968                     sp_sview->faces[j], &rgba[0][j]);
1969       }
1970       else {
1971          min_filter(sp_sview, sp_samp, s[j], t[j], p[j],
1972                     sp_sview->base.u.tex.first_level,
1973                     sp_sview->faces[j], &rgba[0][j]);
1974       }
1975    }
1976 }
1977
1978
1979 static void
1980 mip_filter_none_no_filter_select(struct sp_sampler_view *sp_sview,
1981                                  struct sp_sampler *sp_samp,
1982                                  img_filter_func min_filter,
1983                                  img_filter_func mag_filter,
1984                                  const float s[TGSI_QUAD_SIZE],
1985                                  const float t[TGSI_QUAD_SIZE],
1986                                  const float p[TGSI_QUAD_SIZE],
1987                                  const float c0[TGSI_QUAD_SIZE],
1988                                  const float lod_in[TGSI_QUAD_SIZE],
1989                                  enum tgsi_sampler_control control,
1990                                  float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1991 {
1992    int j;
1993
1994    for (j = 0; j < TGSI_QUAD_SIZE; j++)
1995       mag_filter(sp_sview, sp_samp, s[j], t[j], p[j],
1996                  sp_sview->base.u.tex.first_level,
1997                  sp_sview->faces[j], &rgba[0][j]);
1998 }
1999
2000
2001 /* For anisotropic filtering */
2002 #define WEIGHT_LUT_SIZE 1024
2003
2004 static float *weightLut = NULL;
2005
2006 /**
2007  * Creates the look-up table used to speed-up EWA sampling
2008  */
2009 static void
2010 create_filter_table(void)
2011 {
2012    unsigned i;
2013    if (!weightLut) {
2014       weightLut = (float *) MALLOC(WEIGHT_LUT_SIZE * sizeof(float));
2015
2016       for (i = 0; i < WEIGHT_LUT_SIZE; ++i) {
2017          float alpha = 2;
2018          float r2 = (float) i / (float) (WEIGHT_LUT_SIZE - 1);
2019          float weight = (float) exp(-alpha * r2);
2020          weightLut[i] = weight;
2021       }
2022    }
2023 }
2024
2025
2026 /**
2027  * Elliptical weighted average (EWA) filter for producing high quality
2028  * anisotropic filtered results.
2029  * Based on the Higher Quality Elliptical Weighted Average Filter
2030  * published by Paul S. Heckbert in his Master's Thesis
2031  * "Fundamentals of Texture Mapping and Image Warping" (1989)
2032  */
2033 static void
2034 img_filter_2d_ewa(struct sp_sampler_view *sp_sview,
2035                   struct sp_sampler *sp_samp,
2036                   img_filter_func min_filter,
2037                   img_filter_func mag_filter,
2038                   const float s[TGSI_QUAD_SIZE],
2039                   const float t[TGSI_QUAD_SIZE],
2040                   const float p[TGSI_QUAD_SIZE],
2041                   unsigned level,
2042                   const float dudx, const float dvdx,
2043                   const float dudy, const float dvdy,
2044                   float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2045 {
2046    const struct pipe_resource *texture = sp_sview->base.texture;
2047
2048    // ??? Won't the image filters blow up if level is negative?
2049    unsigned level0 = level > 0 ? level : 0;
2050    float scaling = 1.0f / (1 << level0);
2051    int width = u_minify(texture->width0, level0);
2052    int height = u_minify(texture->height0, level0);
2053
2054    float ux = dudx * scaling;
2055    float vx = dvdx * scaling;
2056    float uy = dudy * scaling;
2057    float vy = dvdy * scaling;
2058
2059    /* compute ellipse coefficients to bound the region:
2060     * A*x*x + B*x*y + C*y*y = F.
2061     */
2062    float A = vx*vx+vy*vy+1;
2063    float B = -2*(ux*vx+uy*vy);
2064    float C = ux*ux+uy*uy+1;
2065    float F = A*C-B*B/4.0f;
2066
2067    /* check if it is an ellipse */
2068    /* ASSERT(F > 0.0); */
2069
2070    /* Compute the ellipse's (u,v) bounding box in texture space */
2071    float d = -B*B+4.0f*C*A;
2072    float box_u = 2.0f / d * sqrtf(d*C*F); /* box_u -> half of bbox with   */
2073    float box_v = 2.0f / d * sqrtf(A*d*F); /* box_v -> half of bbox height */
2074
2075    float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2076    float s_buffer[TGSI_QUAD_SIZE];
2077    float t_buffer[TGSI_QUAD_SIZE];
2078    float weight_buffer[TGSI_QUAD_SIZE];
2079    unsigned buffer_next;
2080    int j;
2081    float den; /* = 0.0F; */
2082    float ddq;
2083    float U; /* = u0 - tex_u; */
2084    int v;
2085
2086    /* Scale ellipse formula to directly index the Filter Lookup Table.
2087     * i.e. scale so that F = WEIGHT_LUT_SIZE-1
2088     */
2089    double formScale = (double) (WEIGHT_LUT_SIZE - 1) / F;
2090    A *= formScale;
2091    B *= formScale;
2092    C *= formScale;
2093    /* F *= formScale; */ /* no need to scale F as we don't use it below here */
2094
2095    /* For each quad, the du and dx values are the same and so the ellipse is
2096     * also the same. Note that texel/image access can only be performed using
2097     * a quad, i.e. it is not possible to get the pixel value for a single
2098     * tex coord. In order to have a better performance, the access is buffered
2099     * using the s_buffer/t_buffer and weight_buffer. Only when the buffer is
2100     * full, then the pixel values are read from the image.
2101     */
2102    ddq = 2 * A;
2103
2104    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2105       /* Heckbert MS thesis, p. 59; scan over the bounding box of the ellipse
2106        * and incrementally update the value of Ax^2+Bxy*Cy^2; when this
2107        * value, q, is less than F, we're inside the ellipse
2108        */
2109       float tex_u = -0.5F + s[j] * texture->width0 * scaling;
2110       float tex_v = -0.5F + t[j] * texture->height0 * scaling;
2111
2112       int u0 = (int) floorf(tex_u - box_u);
2113       int u1 = (int) ceilf(tex_u + box_u);
2114       int v0 = (int) floorf(tex_v - box_v);
2115       int v1 = (int) ceilf(tex_v + box_v);
2116
2117       float num[4] = {0.0F, 0.0F, 0.0F, 0.0F};
2118       buffer_next = 0;
2119       den = 0;
2120       U = u0 - tex_u;
2121       for (v = v0; v <= v1; ++v) {
2122          float V = v - tex_v;
2123          float dq = A * (2 * U + 1) + B * V;
2124          float q = (C * V + B * U) * V + A * U * U;
2125
2126          int u;
2127          for (u = u0; u <= u1; ++u) {
2128             /* Note that the ellipse has been pre-scaled so F =
2129              * WEIGHT_LUT_SIZE - 1
2130              */
2131             if (q < WEIGHT_LUT_SIZE) {
2132                /* as a LUT is used, q must never be negative;
2133                 * should not happen, though
2134                 */
2135                const int qClamped = q >= 0.0F ? q : 0;
2136                float weight = weightLut[qClamped];
2137
2138                weight_buffer[buffer_next] = weight;
2139                s_buffer[buffer_next] = u / ((float) width);
2140                t_buffer[buffer_next] = v / ((float) height);
2141
2142                buffer_next++;
2143                if (buffer_next == TGSI_QUAD_SIZE) {
2144                   /* 4 texel coords are in the buffer -> read it now */
2145                   unsigned jj;
2146                   /* it is assumed that samp->min_img_filter is set to
2147                    * img_filter_2d_nearest or one of the
2148                    * accelerated img_filter_2d_nearest_XXX functions.
2149                    */
2150                   for (jj = 0; jj < buffer_next; jj++) {
2151                      min_filter(sp_sview, sp_samp, s_buffer[jj], t_buffer[jj], p[jj],
2152                                 level, sp_sview->faces[j], &rgba_temp[0][jj]);
2153                      num[0] += weight_buffer[jj] * rgba_temp[0][jj];
2154                      num[1] += weight_buffer[jj] * rgba_temp[1][jj];
2155                      num[2] += weight_buffer[jj] * rgba_temp[2][jj];
2156                      num[3] += weight_buffer[jj] * rgba_temp[3][jj];
2157                   }
2158
2159                   buffer_next = 0;
2160                }
2161
2162                den += weight;
2163             }
2164             q += dq;
2165             dq += ddq;
2166          }
2167       }
2168
2169       /* if the tex coord buffer contains unread values, we will read
2170        * them now.
2171        */
2172       if (buffer_next > 0) {
2173          unsigned jj;
2174          /* it is assumed that samp->min_img_filter is set to
2175           * img_filter_2d_nearest or one of the
2176           * accelerated img_filter_2d_nearest_XXX functions.
2177           */
2178          for (jj = 0; jj < buffer_next; jj++) {
2179             min_filter(sp_sview, sp_samp, s_buffer[jj], t_buffer[jj], p[jj],
2180                        level, sp_sview->faces[j], &rgba_temp[0][jj]);
2181             num[0] += weight_buffer[jj] * rgba_temp[0][jj];
2182             num[1] += weight_buffer[jj] * rgba_temp[1][jj];
2183             num[2] += weight_buffer[jj] * rgba_temp[2][jj];
2184             num[3] += weight_buffer[jj] * rgba_temp[3][jj];
2185          }
2186       }
2187
2188       if (den <= 0.0F) {
2189          /* Reaching this place would mean that no pixels intersected
2190           * the ellipse.  This should never happen because the filter
2191           * we use always intersects at least one pixel.
2192           */
2193
2194          /*rgba[0]=0;
2195          rgba[1]=0;
2196          rgba[2]=0;
2197          rgba[3]=0;*/
2198          /* not enough pixels in resampling, resort to direct interpolation */
2199          min_filter(sp_sview, sp_samp, s[j], t[j], p[j], level,
2200                     sp_sview->faces[j], &rgba_temp[0][j]);
2201          den = 1;
2202          num[0] = rgba_temp[0][j];
2203          num[1] = rgba_temp[1][j];
2204          num[2] = rgba_temp[2][j];
2205          num[3] = rgba_temp[3][j];
2206       }
2207
2208       rgba[0][j] = num[0] / den;
2209       rgba[1][j] = num[1] / den;
2210       rgba[2][j] = num[2] / den;
2211       rgba[3][j] = num[3] / den;
2212    }
2213 }
2214
2215
2216 /**
2217  * Sample 2D texture using an anisotropic filter.
2218  */
2219 static void
2220 mip_filter_linear_aniso(struct sp_sampler_view *sp_sview,
2221                         struct sp_sampler *sp_samp,
2222                         img_filter_func min_filter,
2223                         img_filter_func mag_filter,
2224                         const float s[TGSI_QUAD_SIZE],
2225                         const float t[TGSI_QUAD_SIZE],
2226                         const float p[TGSI_QUAD_SIZE],
2227                         const float c0[TGSI_QUAD_SIZE],
2228                         const float lod_in[TGSI_QUAD_SIZE],
2229                         enum tgsi_sampler_control control,
2230                         float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2231 {
2232    const struct pipe_resource *texture = sp_sview->base.texture;
2233    int level0;
2234    float lambda;
2235    float lod[TGSI_QUAD_SIZE];
2236
2237    float s_to_u = u_minify(texture->width0, sp_sview->base.u.tex.first_level);
2238    float t_to_v = u_minify(texture->height0, sp_sview->base.u.tex.first_level);
2239    float dudx = (s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]) * s_to_u;
2240    float dudy = (s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]) * s_to_u;
2241    float dvdx = (t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]) * t_to_v;
2242    float dvdy = (t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]) * t_to_v;
2243
2244    if (control == tgsi_sampler_lod_bias ||
2245        control == tgsi_sampler_lod_none ||
2246        /* XXX FIXME */
2247        control == tgsi_sampler_derivs_explicit) {
2248       /* note: instead of working with Px and Py, we will use the
2249        * squared length instead, to avoid sqrt.
2250        */
2251       float Px2 = dudx * dudx + dvdx * dvdx;
2252       float Py2 = dudy * dudy + dvdy * dvdy;
2253
2254       float Pmax2;
2255       float Pmin2;
2256       float e;
2257       const float maxEccentricity = sp_samp->base.max_anisotropy * sp_samp->base.max_anisotropy;
2258
2259       if (Px2 < Py2) {
2260          Pmax2 = Py2;
2261          Pmin2 = Px2;
2262       }
2263       else {
2264          Pmax2 = Px2;
2265          Pmin2 = Py2;
2266       }
2267
2268       /* if the eccentricity of the ellipse is too big, scale up the shorter
2269        * of the two vectors to limit the maximum amount of work per pixel
2270        */
2271       e = Pmax2 / Pmin2;
2272       if (e > maxEccentricity) {
2273          /* float s=e / maxEccentricity;
2274             minor[0] *= s;
2275             minor[1] *= s;
2276             Pmin2 *= s; */
2277          Pmin2 = Pmax2 / maxEccentricity;
2278       }
2279
2280       /* note: we need to have Pmin=sqrt(Pmin2) here, but we can avoid
2281        * this since 0.5*log(x) = log(sqrt(x))
2282        */
2283       lambda = 0.5F * util_fast_log2(Pmin2) + sp_samp->base.lod_bias;
2284       compute_lod(&sp_samp->base, control, lambda, lod_in, lod);
2285    }
2286    else {
2287       assert(control == tgsi_sampler_lod_explicit ||
2288              control == tgsi_sampler_lod_zero);
2289       compute_lod(&sp_samp->base, control, sp_samp->base.lod_bias, lod_in, lod);
2290    }
2291
2292    /* XXX: Take into account all lod values.
2293     */
2294    lambda = lod[0];
2295    level0 = sp_sview->base.u.tex.first_level + (int)lambda;
2296
2297    /* If the ellipse covers the whole image, we can
2298     * simply return the average of the whole image.
2299     */
2300    if (level0 >= (int) texture->last_level) {
2301       int j;
2302       for (j = 0; j < TGSI_QUAD_SIZE; j++)
2303          min_filter(sp_sview, sp_samp, s[j], t[j], p[j], texture->last_level,
2304                     sp_sview->faces[j], &rgba[0][j]);
2305    }
2306    else {
2307       /* don't bother interpolating between multiple LODs; it doesn't
2308        * seem to be worth the extra running time.
2309        */
2310       img_filter_2d_ewa(sp_sview, sp_samp, min_filter, mag_filter,
2311                         s, t, p, level0,
2312                         dudx, dvdx, dudy, dvdy, rgba);
2313    }
2314
2315    if (DEBUG_TEX) {
2316       print_sample_4(__FUNCTION__, rgba);
2317    }
2318 }
2319
2320
2321 /**
2322  * Specialized version of mip_filter_linear with hard-wired calls to
2323  * 2d lambda calculation and 2d_linear_repeat_POT img filters.
2324  */
2325 static void
2326 mip_filter_linear_2d_linear_repeat_POT(
2327    struct sp_sampler_view *sp_sview,
2328    struct sp_sampler *sp_samp,
2329    img_filter_func min_filter,
2330    img_filter_func mag_filter,
2331    const float s[TGSI_QUAD_SIZE],
2332    const float t[TGSI_QUAD_SIZE],
2333    const float p[TGSI_QUAD_SIZE],
2334    const float c0[TGSI_QUAD_SIZE],
2335    const float lod_in[TGSI_QUAD_SIZE],
2336    enum tgsi_sampler_control control,
2337    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2338 {
2339    const struct pipe_resource *texture = sp_sview->base.texture;
2340    int j;
2341    float lod[TGSI_QUAD_SIZE];
2342
2343    compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, control, lod);
2344
2345    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2346       int level0 = sp_sview->base.u.tex.first_level + (int)lod[j];
2347
2348       /* Catches both negative and large values of level0:
2349        */
2350       if ((unsigned)level0 >= texture->last_level) {
2351          if (level0 < 0)
2352             img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, s[j], t[j], p[j],
2353                                             sp_sview->base.u.tex.first_level,
2354                                             sp_sview->faces[j], &rgba[0][j]);
2355          else
2356             img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, s[j], t[j], p[j],
2357                                             sp_sview->base.texture->last_level,
2358                                             sp_sview->faces[j], &rgba[0][j]);
2359
2360       }
2361       else {
2362          float levelBlend = frac(lod[j]);
2363          float rgbax[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2364          int c;
2365
2366          img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, s[j], t[j], p[j], level0,
2367                                          sp_sview->faces[j], &rgbax[0][0]);
2368          img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, s[j], t[j], p[j], level0+1,
2369                                          sp_sview->faces[j], &rgbax[0][1]);
2370
2371          for (c = 0; c < TGSI_NUM_CHANNELS; c++)
2372             rgba[c][j] = lerp(levelBlend, rgbax[c][0], rgbax[c][1]);
2373       }
2374    }
2375
2376    if (DEBUG_TEX) {
2377       print_sample_4(__FUNCTION__, rgba);
2378    }
2379 }
2380
2381
2382 /**
2383  * Do shadow/depth comparisons.
2384  */
2385 static void
2386 sample_compare(struct sp_sampler_view *sp_sview,
2387                struct sp_sampler *sp_samp,
2388                const float s[TGSI_QUAD_SIZE],
2389                const float t[TGSI_QUAD_SIZE],
2390                const float p[TGSI_QUAD_SIZE],
2391                const float c0[TGSI_QUAD_SIZE],
2392                const float c1[TGSI_QUAD_SIZE],
2393                enum tgsi_sampler_control control,
2394                float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2395 {
2396    const struct pipe_sampler_state *sampler = &sp_samp->base;
2397    int j, k0, k1, k2, k3;
2398    float val;
2399    float pc0, pc1, pc2, pc3;
2400    const struct util_format_description *format_desc;
2401    unsigned chan_type;
2402
2403    /**
2404     * Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
2405     * for 2D Array texture we need to use the 'c0' (aka Q).
2406     * When we sampled the depth texture, the depth value was put into all
2407     * RGBA channels.  We look at the red channel here.
2408     */
2409
2410    if (sp_sview->base.texture->target == PIPE_TEXTURE_2D_ARRAY ||
2411        sp_sview->base.texture->target == PIPE_TEXTURE_CUBE) {
2412       pc0 = c0[0];
2413       pc1 = c0[1];
2414       pc2 = c0[2];
2415       pc3 = c0[3];
2416    } else if (sp_sview->base.texture->target == PIPE_TEXTURE_CUBE_ARRAY) {
2417       pc0 = c1[0];
2418       pc1 = c1[1];
2419       pc2 = c1[2];
2420       pc3 = c1[3];
2421    } else {
2422       pc0 = p[0];
2423       pc1 = p[1];
2424       pc2 = p[2];
2425       pc3 = p[3];
2426    }
2427
2428    format_desc = util_format_description(sp_sview->base.format);
2429    /* not entirely sure we couldn't end up with non-valid swizzle here */
2430    chan_type = format_desc->swizzle[0] <= UTIL_FORMAT_SWIZZLE_W ?
2431                   format_desc->channel[format_desc->swizzle[0]].type :
2432                   UTIL_FORMAT_TYPE_FLOAT;
2433    if (chan_type != UTIL_FORMAT_TYPE_FLOAT) {
2434       /*
2435        * clamping is a result of conversion to texture format, hence
2436        * doesn't happen with floats. Technically also should do comparison
2437        * in texture format (quantization!).
2438        */
2439       pc0 = CLAMP(pc0, 0.0F, 1.0F);
2440       pc1 = CLAMP(pc1, 0.0F, 1.0F);
2441       pc2 = CLAMP(pc2, 0.0F, 1.0F);
2442       pc3 = CLAMP(pc3, 0.0F, 1.0F);
2443    }
2444
2445    /* compare four texcoords vs. four texture samples */
2446    switch (sampler->compare_func) {
2447    case PIPE_FUNC_LESS:
2448       k0 = pc0 < rgba[0][0];
2449       k1 = pc1 < rgba[0][1];
2450       k2 = pc2 < rgba[0][2];
2451       k3 = pc3 < rgba[0][3];
2452       break;
2453    case PIPE_FUNC_LEQUAL:
2454       k0 = pc0 <= rgba[0][0];
2455       k1 = pc1 <= rgba[0][1];
2456       k2 = pc2 <= rgba[0][2];
2457       k3 = pc3 <= rgba[0][3];
2458       break;
2459    case PIPE_FUNC_GREATER:
2460       k0 = pc0 > rgba[0][0];
2461       k1 = pc1 > rgba[0][1];
2462       k2 = pc2 > rgba[0][2];
2463       k3 = pc3 > rgba[0][3];
2464       break;
2465    case PIPE_FUNC_GEQUAL:
2466       k0 = pc0 >= rgba[0][0];
2467       k1 = pc1 >= rgba[0][1];
2468       k2 = pc2 >= rgba[0][2];
2469       k3 = pc3 >= rgba[0][3];
2470       break;
2471    case PIPE_FUNC_EQUAL:
2472       k0 = pc0 == rgba[0][0];
2473       k1 = pc1 == rgba[0][1];
2474       k2 = pc2 == rgba[0][2];
2475       k3 = pc3 == rgba[0][3];
2476       break;
2477    case PIPE_FUNC_NOTEQUAL:
2478       k0 = pc0 != rgba[0][0];
2479       k1 = pc1 != rgba[0][1];
2480       k2 = pc2 != rgba[0][2];
2481       k3 = pc3 != rgba[0][3];
2482       break;
2483    case PIPE_FUNC_ALWAYS:
2484       k0 = k1 = k2 = k3 = 1;
2485       break;
2486    case PIPE_FUNC_NEVER:
2487       k0 = k1 = k2 = k3 = 0;
2488       break;
2489    default:
2490       k0 = k1 = k2 = k3 = 0;
2491       assert(0);
2492       break;
2493    }
2494
2495    if (sampler->mag_img_filter == PIPE_TEX_FILTER_LINEAR) {
2496       /* convert four pass/fail values to an intensity in [0,1] */
2497       /*
2498        * XXX this doesn't actually make much sense.
2499        * We just average the result of four _pixels_ and output the same
2500        * value for all of the four pixels of the quad.
2501        * This really needs to work on the _samples_ i.e. inside the img filter.
2502        */
2503       val = 0.25F * (k0 + k1 + k2 + k3);
2504
2505       /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
2506       for (j = 0; j < 4; j++) {
2507          rgba[0][j] = rgba[1][j] = rgba[2][j] = val;
2508          rgba[3][j] = 1.0F;
2509       }
2510    } else {
2511       for (j = 0; j < 4; j++) {
2512          rgba[0][j] = k0;
2513          rgba[1][j] = k1;
2514          rgba[2][j] = k2;
2515          rgba[3][j] = 1.0F;
2516       }
2517    }
2518 }
2519
2520
2521 static void
2522 do_swizzling(const struct pipe_sampler_view *sview,
2523              float in[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],
2524              float out[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2525 {
2526    int j;
2527    const unsigned swizzle_r = sview->swizzle_r;
2528    const unsigned swizzle_g = sview->swizzle_g;
2529    const unsigned swizzle_b = sview->swizzle_b;
2530    const unsigned swizzle_a = sview->swizzle_a;
2531
2532    switch (swizzle_r) {
2533    case PIPE_SWIZZLE_ZERO:
2534       for (j = 0; j < 4; j++)
2535          out[0][j] = 0.0f;
2536       break;
2537    case PIPE_SWIZZLE_ONE:
2538       for (j = 0; j < 4; j++)
2539          out[0][j] = 1.0f;
2540       break;
2541    default:
2542       assert(swizzle_r < 4);
2543       for (j = 0; j < 4; j++)
2544          out[0][j] = in[swizzle_r][j];
2545    }
2546
2547    switch (swizzle_g) {
2548    case PIPE_SWIZZLE_ZERO:
2549       for (j = 0; j < 4; j++)
2550          out[1][j] = 0.0f;
2551       break;
2552    case PIPE_SWIZZLE_ONE:
2553       for (j = 0; j < 4; j++)
2554          out[1][j] = 1.0f;
2555       break;
2556    default:
2557       assert(swizzle_g < 4);
2558       for (j = 0; j < 4; j++)
2559          out[1][j] = in[swizzle_g][j];
2560    }
2561
2562    switch (swizzle_b) {
2563    case PIPE_SWIZZLE_ZERO:
2564       for (j = 0; j < 4; j++)
2565          out[2][j] = 0.0f;
2566       break;
2567    case PIPE_SWIZZLE_ONE:
2568       for (j = 0; j < 4; j++)
2569          out[2][j] = 1.0f;
2570       break;
2571    default:
2572       assert(swizzle_b < 4);
2573       for (j = 0; j < 4; j++)
2574          out[2][j] = in[swizzle_b][j];
2575    }
2576
2577    switch (swizzle_a) {
2578    case PIPE_SWIZZLE_ZERO:
2579       for (j = 0; j < 4; j++)
2580          out[3][j] = 0.0f;
2581       break;
2582    case PIPE_SWIZZLE_ONE:
2583       for (j = 0; j < 4; j++)
2584          out[3][j] = 1.0f;
2585       break;
2586    default:
2587       assert(swizzle_a < 4);
2588       for (j = 0; j < 4; j++)
2589          out[3][j] = in[swizzle_a][j];
2590    }
2591 }
2592
2593
2594 static wrap_nearest_func
2595 get_nearest_unorm_wrap(unsigned mode)
2596 {
2597    switch (mode) {
2598    case PIPE_TEX_WRAP_CLAMP:
2599       return wrap_nearest_unorm_clamp;
2600    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2601       return wrap_nearest_unorm_clamp_to_edge;
2602    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2603       return wrap_nearest_unorm_clamp_to_border;
2604    default:
2605       assert(0);
2606       return wrap_nearest_unorm_clamp;
2607    }
2608 }
2609
2610
2611 static wrap_nearest_func
2612 get_nearest_wrap(unsigned mode)
2613 {
2614    switch (mode) {
2615    case PIPE_TEX_WRAP_REPEAT:
2616       return wrap_nearest_repeat;
2617    case PIPE_TEX_WRAP_CLAMP:
2618       return wrap_nearest_clamp;
2619    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2620       return wrap_nearest_clamp_to_edge;
2621    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2622       return wrap_nearest_clamp_to_border;
2623    case PIPE_TEX_WRAP_MIRROR_REPEAT:
2624       return wrap_nearest_mirror_repeat;
2625    case PIPE_TEX_WRAP_MIRROR_CLAMP:
2626       return wrap_nearest_mirror_clamp;
2627    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
2628       return wrap_nearest_mirror_clamp_to_edge;
2629    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
2630       return wrap_nearest_mirror_clamp_to_border;
2631    default:
2632       assert(0);
2633       return wrap_nearest_repeat;
2634    }
2635 }
2636
2637
2638 static wrap_linear_func
2639 get_linear_unorm_wrap(unsigned mode)
2640 {
2641    switch (mode) {
2642    case PIPE_TEX_WRAP_CLAMP:
2643       return wrap_linear_unorm_clamp;
2644    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2645       return wrap_linear_unorm_clamp_to_edge;
2646    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2647       return wrap_linear_unorm_clamp_to_border;
2648    default:
2649       assert(0);
2650       return wrap_linear_unorm_clamp;
2651    }
2652 }
2653
2654
2655 static wrap_linear_func
2656 get_linear_wrap(unsigned mode)
2657 {
2658    switch (mode) {
2659    case PIPE_TEX_WRAP_REPEAT:
2660       return wrap_linear_repeat;
2661    case PIPE_TEX_WRAP_CLAMP:
2662       return wrap_linear_clamp;
2663    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2664       return wrap_linear_clamp_to_edge;
2665    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2666       return wrap_linear_clamp_to_border;
2667    case PIPE_TEX_WRAP_MIRROR_REPEAT:
2668       return wrap_linear_mirror_repeat;
2669    case PIPE_TEX_WRAP_MIRROR_CLAMP:
2670       return wrap_linear_mirror_clamp;
2671    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
2672       return wrap_linear_mirror_clamp_to_edge;
2673    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
2674       return wrap_linear_mirror_clamp_to_border;
2675    default:
2676       assert(0);
2677       return wrap_linear_repeat;
2678    }
2679 }
2680
2681
2682 /**
2683  * Is swizzling needed for the given state key?
2684  */
2685 static INLINE bool
2686 any_swizzle(const struct pipe_sampler_view *view)
2687 {
2688    return (view->swizzle_r != PIPE_SWIZZLE_RED ||
2689            view->swizzle_g != PIPE_SWIZZLE_GREEN ||
2690            view->swizzle_b != PIPE_SWIZZLE_BLUE ||
2691            view->swizzle_a != PIPE_SWIZZLE_ALPHA);
2692 }
2693
2694
2695 static img_filter_func
2696 get_img_filter(const struct sp_sampler_view *sp_sview,
2697                const struct pipe_sampler_state *sampler,
2698                unsigned filter)
2699 {
2700    switch (sp_sview->base.texture->target) {
2701    case PIPE_BUFFER:
2702    case PIPE_TEXTURE_1D:
2703       if (filter == PIPE_TEX_FILTER_NEAREST)
2704          return img_filter_1d_nearest;
2705       else
2706          return img_filter_1d_linear;
2707       break;
2708    case PIPE_TEXTURE_1D_ARRAY:
2709       if (filter == PIPE_TEX_FILTER_NEAREST)
2710          return img_filter_1d_array_nearest;
2711       else
2712          return img_filter_1d_array_linear;
2713       break;
2714    case PIPE_TEXTURE_2D:
2715    case PIPE_TEXTURE_RECT:
2716       /* Try for fast path:
2717        */
2718       if (sp_sview->pot2d &&
2719           sampler->wrap_s == sampler->wrap_t &&
2720           sampler->normalized_coords)
2721       {
2722          switch (sampler->wrap_s) {
2723          case PIPE_TEX_WRAP_REPEAT:
2724             switch (filter) {
2725             case PIPE_TEX_FILTER_NEAREST:
2726                return img_filter_2d_nearest_repeat_POT;
2727             case PIPE_TEX_FILTER_LINEAR:
2728                return img_filter_2d_linear_repeat_POT;
2729             default:
2730                break;
2731             }
2732             break;
2733          case PIPE_TEX_WRAP_CLAMP:
2734             switch (filter) {
2735             case PIPE_TEX_FILTER_NEAREST:
2736                return img_filter_2d_nearest_clamp_POT;
2737             default:
2738                break;
2739             }
2740          }
2741       }
2742       /* Otherwise use default versions:
2743        */
2744       if (filter == PIPE_TEX_FILTER_NEAREST)
2745          return img_filter_2d_nearest;
2746       else
2747          return img_filter_2d_linear;
2748       break;
2749    case PIPE_TEXTURE_2D_ARRAY:
2750       if (filter == PIPE_TEX_FILTER_NEAREST)
2751          return img_filter_2d_array_nearest;
2752       else
2753          return img_filter_2d_array_linear;
2754       break;
2755    case PIPE_TEXTURE_CUBE:
2756       if (filter == PIPE_TEX_FILTER_NEAREST)
2757          return img_filter_cube_nearest;
2758       else
2759          return img_filter_cube_linear;
2760       break;
2761    case PIPE_TEXTURE_CUBE_ARRAY:
2762       if (filter == PIPE_TEX_FILTER_NEAREST)
2763          return img_filter_cube_array_nearest;
2764       else
2765          return img_filter_cube_array_linear;
2766       break;
2767    case PIPE_TEXTURE_3D:
2768       if (filter == PIPE_TEX_FILTER_NEAREST)
2769          return img_filter_3d_nearest;
2770       else
2771          return img_filter_3d_linear;
2772       break;
2773    default:
2774       assert(0);
2775       return img_filter_1d_nearest;
2776    }
2777 }
2778
2779
2780 static void
2781 sample_mip(struct sp_sampler_view *sp_sview,
2782            struct sp_sampler *sp_samp,
2783            const float s[TGSI_QUAD_SIZE],
2784            const float t[TGSI_QUAD_SIZE],
2785            const float p[TGSI_QUAD_SIZE],
2786            const float c0[TGSI_QUAD_SIZE],
2787            const float lod[TGSI_QUAD_SIZE],
2788            enum tgsi_sampler_control control,
2789            float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2790 {
2791    mip_filter_func mip_filter;
2792    img_filter_func min_img_filter = NULL;
2793    img_filter_func mag_img_filter = NULL;
2794
2795    if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) {
2796       mip_filter = mip_filter_linear_2d_linear_repeat_POT;
2797    }
2798    else {
2799       mip_filter = sp_samp->mip_filter;
2800       min_img_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->min_img_filter);
2801       if (sp_samp->min_mag_equal) {
2802          mag_img_filter = min_img_filter;
2803       }
2804       else {
2805          mag_img_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->base.mag_img_filter);
2806       }
2807    }
2808
2809    mip_filter(sp_sview, sp_samp, min_img_filter, mag_img_filter,
2810               s, t, p, c0, lod, control, rgba);
2811
2812    if (sp_samp->base.compare_mode != PIPE_TEX_COMPARE_NONE) {
2813       sample_compare(sp_sview, sp_samp, s, t, p, c0, lod, control, rgba);
2814    }
2815
2816    if (sp_sview->need_swizzle) {
2817       float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2818       memcpy(rgba_temp, rgba, sizeof(rgba_temp));
2819       do_swizzling(&sp_sview->base, rgba_temp, rgba);
2820    }
2821
2822 }
2823
2824
2825 /**
2826  * Use 3D texcoords to choose a cube face, then sample the 2D cube faces.
2827  * Put face info into the sampler faces[] array.
2828  */
2829 static void
2830 sample_cube(struct sp_sampler_view *sp_sview,
2831             struct sp_sampler *sp_samp,
2832             const float s[TGSI_QUAD_SIZE],
2833             const float t[TGSI_QUAD_SIZE],
2834             const float p[TGSI_QUAD_SIZE],
2835             const float c0[TGSI_QUAD_SIZE],
2836             const float c1[TGSI_QUAD_SIZE],
2837             enum tgsi_sampler_control control,
2838             float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2839 {
2840    unsigned j;
2841    float ssss[4], tttt[4];
2842
2843    /* Not actually used, but the intermediate steps that do the
2844     * dereferencing don't know it.
2845     */
2846    static float pppp[4] = { 0, 0, 0, 0 };
2847
2848    pppp[0] = c0[0];
2849    pppp[1] = c0[1];
2850    pppp[2] = c0[2];
2851    pppp[3] = c0[3];
2852    /*
2853      major axis
2854      direction    target                             sc     tc    ma
2855      ----------   -------------------------------    ---    ---   ---
2856      +rx          TEXTURE_CUBE_MAP_POSITIVE_X_EXT    -rz    -ry   rx
2857      -rx          TEXTURE_CUBE_MAP_NEGATIVE_X_EXT    +rz    -ry   rx
2858      +ry          TEXTURE_CUBE_MAP_POSITIVE_Y_EXT    +rx    +rz   ry
2859      -ry          TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT    +rx    -rz   ry
2860      +rz          TEXTURE_CUBE_MAP_POSITIVE_Z_EXT    +rx    -ry   rz
2861      -rz          TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT    -rx    -ry   rz
2862    */
2863
2864    /* Choose the cube face and compute new s/t coords for the 2D face.
2865     *
2866     * Use the same cube face for all four pixels in the quad.
2867     *
2868     * This isn't ideal, but if we want to use a different cube face
2869     * per pixel in the quad, we'd have to also compute the per-face
2870     * LOD here too.  That's because the four post-face-selection
2871     * texcoords are no longer related to each other (they're
2872     * per-face!)  so we can't use subtraction to compute the partial
2873     * deriviates to compute the LOD.  Doing so (near cube edges
2874     * anyway) gives us pretty much random values.
2875     */
2876    {
2877       /* use the average of the four pixel's texcoords to choose the face */
2878       const float rx = 0.25F * (s[0] + s[1] + s[2] + s[3]);
2879       const float ry = 0.25F * (t[0] + t[1] + t[2] + t[3]);
2880       const float rz = 0.25F * (p[0] + p[1] + p[2] + p[3]);
2881       const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz);
2882
2883       if (arx >= ary && arx >= arz) {
2884          float sign = (rx >= 0.0F) ? 1.0F : -1.0F;
2885          uint face = (rx >= 0.0F) ? PIPE_TEX_FACE_POS_X : PIPE_TEX_FACE_NEG_X;
2886          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2887             const float ima = -0.5F / fabsf(s[j]);
2888             ssss[j] = sign *  p[j] * ima + 0.5F;
2889             tttt[j] =         t[j] * ima + 0.5F;
2890             sp_sview->faces[j] = face;
2891          }
2892       }
2893       else if (ary >= arx && ary >= arz) {
2894          float sign = (ry >= 0.0F) ? 1.0F : -1.0F;
2895          uint face = (ry >= 0.0F) ? PIPE_TEX_FACE_POS_Y : PIPE_TEX_FACE_NEG_Y;
2896          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2897             const float ima = -0.5F / fabsf(t[j]);
2898             ssss[j] =        -s[j] * ima + 0.5F;
2899             tttt[j] = sign * -p[j] * ima + 0.5F;
2900             sp_sview->faces[j] = face;
2901          }
2902       }
2903       else {
2904          float sign = (rz >= 0.0F) ? 1.0F : -1.0F;
2905          uint face = (rz >= 0.0F) ? PIPE_TEX_FACE_POS_Z : PIPE_TEX_FACE_NEG_Z;
2906          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2907             const float ima = -0.5F / fabsf(p[j]);
2908             ssss[j] = sign * -s[j] * ima + 0.5F;
2909             tttt[j] =         t[j] * ima + 0.5F;
2910             sp_sview->faces[j] = face;
2911          }
2912       }
2913    }
2914
2915    sample_mip(sp_sview, sp_samp, ssss, tttt, pppp, c0, c1, control, rgba);
2916 }
2917
2918
2919 static void
2920 sp_get_dims(struct sp_sampler_view *sp_sview, int level,
2921             int dims[4])
2922 {
2923    const struct pipe_sampler_view *view = &sp_sview->base;
2924    const struct pipe_resource *texture = view->texture;
2925
2926    /* undefined according to EXT_gpu_program */
2927    level += view->u.tex.first_level;
2928    if (level > view->u.tex.last_level)
2929       return;
2930
2931    dims[0] = u_minify(texture->width0, level);
2932
2933    switch(texture->target) {
2934    case PIPE_TEXTURE_1D_ARRAY:
2935       dims[1] = view->u.tex.last_layer - view->u.tex.first_layer + 1;
2936       /* fallthrough */
2937    case PIPE_TEXTURE_1D:
2938       return;
2939    case PIPE_TEXTURE_2D_ARRAY:
2940       dims[2] = view->u.tex.last_layer - view->u.tex.first_layer + 1;
2941       /* fallthrough */
2942    case PIPE_TEXTURE_2D:
2943    case PIPE_TEXTURE_CUBE:
2944    case PIPE_TEXTURE_RECT:
2945       dims[1] = u_minify(texture->height0, level);
2946       return;
2947    case PIPE_TEXTURE_3D:
2948       dims[1] = u_minify(texture->height0, level);
2949       dims[2] = u_minify(texture->depth0, level);
2950       return;
2951    case PIPE_TEXTURE_CUBE_ARRAY:
2952       dims[1] = u_minify(texture->height0, level);
2953       dims[2] = (view->u.tex.last_layer - view->u.tex.first_layer + 1) / 6;
2954       break;
2955    case PIPE_BUFFER:
2956       dims[0] /= util_format_get_blocksize(view->format);
2957       return;
2958    default:
2959       assert(!"unexpected texture target in sp_get_dims()");
2960       return;
2961    }
2962 }
2963
2964 /**
2965  * This function is only used for getting unfiltered texels via the
2966  * TXF opcode.  The GL spec says that out-of-bounds texel fetches
2967  * produce undefined results.  Instead of crashing, lets just clamp
2968  * coords to the texture image size.
2969  */
2970 static void
2971 sp_get_texels(struct sp_sampler_view *sp_sview,
2972               const int v_i[TGSI_QUAD_SIZE],
2973               const int v_j[TGSI_QUAD_SIZE],
2974               const int v_k[TGSI_QUAD_SIZE],
2975               const int lod[TGSI_QUAD_SIZE],
2976               const int8_t offset[3],
2977               float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2978 {
2979    union tex_tile_address addr;
2980    const struct pipe_resource *texture = sp_sview->base.texture;
2981    int j, c;
2982    const float *tx;
2983    int width, height, depth;
2984
2985    addr.value = 0;
2986    /* TODO write a better test for LOD */
2987    addr.bits.level = lod[0];
2988
2989    width = u_minify(texture->width0, addr.bits.level);
2990    height = u_minify(texture->height0, addr.bits.level);
2991    depth = u_minify(texture->depth0, addr.bits.level);
2992
2993    switch(texture->target) {
2994    case PIPE_BUFFER:
2995    case PIPE_TEXTURE_1D:
2996       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2997          int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
2998          tx = get_texel_2d_no_border(sp_sview, addr, x, 0);
2999          for (c = 0; c < 4; c++) {
3000             rgba[c][j] = tx[c];
3001          }
3002       }
3003       break;
3004    case PIPE_TEXTURE_1D_ARRAY:
3005       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3006          int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
3007          int y = CLAMP(v_j[j], sp_sview->base.u.tex.first_layer, sp_sview->base.u.tex.last_layer);
3008          tx = get_texel_2d_no_border(sp_sview, addr, x, y);
3009          for (c = 0; c < 4; c++) {
3010             rgba[c][j] = tx[c];
3011          }
3012       }
3013       break;
3014    case PIPE_TEXTURE_2D:
3015    case PIPE_TEXTURE_RECT:
3016       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3017          int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
3018          int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
3019          tx = get_texel_2d_no_border(sp_sview, addr, x, y);
3020          for (c = 0; c < 4; c++) {
3021             rgba[c][j] = tx[c];
3022          }
3023       }
3024       break;
3025    case PIPE_TEXTURE_2D_ARRAY:
3026       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3027          int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
3028          int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
3029          int layer = CLAMP(v_k[j], sp_sview->base.u.tex.first_layer, sp_sview->base.u.tex.last_layer);
3030          tx = get_texel_3d_no_border(sp_sview, addr, x, y, layer);
3031          for (c = 0; c < 4; c++) {
3032             rgba[c][j] = tx[c];
3033          }
3034       }
3035       break;
3036    case PIPE_TEXTURE_3D:
3037       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3038          int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
3039          int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
3040          int z = CLAMP(v_k[j] + offset[2], 0, depth - 1);
3041          tx = get_texel_3d_no_border(sp_sview, addr, x, y, z);
3042          for (c = 0; c < 4; c++) {
3043             rgba[c][j] = tx[c];
3044          }
3045       }
3046       break;
3047    case PIPE_TEXTURE_CUBE: /* TXF can't work on CUBE according to spec */
3048    default:
3049       assert(!"Unknown or CUBE texture type in TXF processing\n");
3050       break;
3051    }
3052
3053    if (sp_sview->need_swizzle) {
3054       float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
3055       memcpy(rgba_temp, rgba, sizeof(rgba_temp));
3056       do_swizzling(&sp_sview->base, rgba_temp, rgba);
3057    }
3058 }
3059
3060
3061 void *
3062 softpipe_create_sampler_state(struct pipe_context *pipe,
3063                               const struct pipe_sampler_state *sampler)
3064 {
3065    struct sp_sampler *samp = CALLOC_STRUCT(sp_sampler);
3066
3067    samp->base = *sampler;
3068
3069    /* Note that (for instance) linear_texcoord_s and
3070     * nearest_texcoord_s may be active at the same time, if the
3071     * sampler min_img_filter differs from its mag_img_filter.
3072     */
3073    if (sampler->normalized_coords) {
3074       samp->linear_texcoord_s = get_linear_wrap( sampler->wrap_s );
3075       samp->linear_texcoord_t = get_linear_wrap( sampler->wrap_t );
3076       samp->linear_texcoord_p = get_linear_wrap( sampler->wrap_r );
3077
3078       samp->nearest_texcoord_s = get_nearest_wrap( sampler->wrap_s );
3079       samp->nearest_texcoord_t = get_nearest_wrap( sampler->wrap_t );
3080       samp->nearest_texcoord_p = get_nearest_wrap( sampler->wrap_r );
3081    }
3082    else {
3083       samp->linear_texcoord_s = get_linear_unorm_wrap( sampler->wrap_s );
3084       samp->linear_texcoord_t = get_linear_unorm_wrap( sampler->wrap_t );
3085       samp->linear_texcoord_p = get_linear_unorm_wrap( sampler->wrap_r );
3086
3087       samp->nearest_texcoord_s = get_nearest_unorm_wrap( sampler->wrap_s );
3088       samp->nearest_texcoord_t = get_nearest_unorm_wrap( sampler->wrap_t );
3089       samp->nearest_texcoord_p = get_nearest_unorm_wrap( sampler->wrap_r );
3090    }
3091
3092    samp->min_img_filter = sampler->min_img_filter;
3093
3094    switch (sampler->min_mip_filter) {
3095    case PIPE_TEX_MIPFILTER_NONE:
3096       if (sampler->min_img_filter == sampler->mag_img_filter)
3097          samp->mip_filter = mip_filter_none_no_filter_select;
3098       else
3099          samp->mip_filter = mip_filter_none;
3100       break;
3101
3102    case PIPE_TEX_MIPFILTER_NEAREST:
3103       samp->mip_filter = mip_filter_nearest;
3104       break;
3105
3106    case PIPE_TEX_MIPFILTER_LINEAR:
3107       if (sampler->min_img_filter == sampler->mag_img_filter &&
3108           sampler->normalized_coords &&
3109           sampler->wrap_s == PIPE_TEX_WRAP_REPEAT &&
3110           sampler->wrap_t == PIPE_TEX_WRAP_REPEAT &&
3111           sampler->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
3112           sampler->max_anisotropy <= 1) {
3113          samp->min_mag_equal_repeat_linear = TRUE;
3114       }
3115       samp->mip_filter = mip_filter_linear;
3116
3117       /* Anisotropic filtering extension. */
3118       if (sampler->max_anisotropy > 1) {
3119          samp->mip_filter = mip_filter_linear_aniso;
3120
3121          /* Override min_img_filter:
3122           * min_img_filter needs to be set to NEAREST since we need to access
3123           * each texture pixel as it is and weight it later; using linear
3124           * filters will have incorrect results.
3125           * By setting the filter to NEAREST here, we can avoid calling the
3126           * generic img_filter_2d_nearest in the anisotropic filter function,
3127           * making it possible to use one of the accelerated implementations
3128           */
3129          samp->min_img_filter = PIPE_TEX_FILTER_NEAREST;
3130
3131          /* on first access create the lookup table containing the filter weights. */
3132         if (!weightLut) {
3133            create_filter_table();
3134         }
3135       }
3136       break;
3137    }
3138    if (samp->min_img_filter == sampler->mag_img_filter) {
3139       samp->min_mag_equal = TRUE;
3140    }
3141
3142    return (void *)samp;
3143 }
3144
3145
3146 compute_lambda_func
3147 softpipe_get_lambda_func(const struct pipe_sampler_view *view, unsigned shader)
3148 {
3149    if (shader != PIPE_SHADER_FRAGMENT)
3150       return compute_lambda_vert;
3151
3152    switch (view->texture->target) {
3153    case PIPE_BUFFER:
3154    case PIPE_TEXTURE_1D:
3155    case PIPE_TEXTURE_1D_ARRAY:
3156       return compute_lambda_1d;
3157    case PIPE_TEXTURE_2D:
3158    case PIPE_TEXTURE_2D_ARRAY:
3159    case PIPE_TEXTURE_RECT:
3160    case PIPE_TEXTURE_CUBE:
3161    case PIPE_TEXTURE_CUBE_ARRAY:
3162       return compute_lambda_2d;
3163    case PIPE_TEXTURE_3D:
3164       return compute_lambda_3d;
3165    default:
3166       assert(0);
3167       return compute_lambda_1d;
3168    }
3169 }
3170
3171
3172 struct pipe_sampler_view *
3173 softpipe_create_sampler_view(struct pipe_context *pipe,
3174                              struct pipe_resource *resource,
3175                              const struct pipe_sampler_view *templ)
3176 {
3177    struct sp_sampler_view *sview = CALLOC_STRUCT(sp_sampler_view);
3178    struct softpipe_resource *spr = (struct softpipe_resource *)resource;
3179
3180    if (sview) {
3181       struct pipe_sampler_view *view = &sview->base;
3182       *view = *templ;
3183       view->reference.count = 1;
3184       view->texture = NULL;
3185       pipe_resource_reference(&view->texture, resource);
3186       view->context = pipe;
3187
3188       if (any_swizzle(view)) {
3189          sview->need_swizzle = TRUE;
3190       }
3191
3192       if (resource->target == PIPE_TEXTURE_CUBE ||
3193           resource->target == PIPE_TEXTURE_CUBE_ARRAY)
3194          sview->get_samples = sample_cube;
3195       else {
3196          sview->get_samples = sample_mip;
3197       }
3198       sview->pot2d = spr->pot &&
3199                      (resource->target == PIPE_TEXTURE_2D ||
3200                       resource->target == PIPE_TEXTURE_RECT);
3201
3202       sview->xpot = util_logbase2( resource->width0 );
3203       sview->ypot = util_logbase2( resource->height0 );
3204    }
3205
3206    return (struct pipe_sampler_view *) sview;
3207 }
3208
3209
3210 static void
3211 sp_tgsi_get_dims(struct tgsi_sampler *tgsi_sampler,
3212                  const unsigned sview_index,
3213                  int level, int dims[4])
3214 {
3215    struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler;
3216
3217    assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
3218    /* always have a view here but texture is NULL if no sampler view was set. */
3219    if (!sp_samp->sp_sview[sview_index].base.texture) {
3220       dims[0] = dims[1] = dims[2] = dims[3] = 0;
3221       return;
3222    }
3223    sp_get_dims(&sp_samp->sp_sview[sview_index], level, dims);
3224 }
3225
3226
3227 static void
3228 sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler,
3229                     const unsigned sview_index,
3230                     const unsigned sampler_index,
3231                     const float s[TGSI_QUAD_SIZE],
3232                     const float t[TGSI_QUAD_SIZE],
3233                     const float p[TGSI_QUAD_SIZE],
3234                     const float c0[TGSI_QUAD_SIZE],
3235                     const float lod[TGSI_QUAD_SIZE],
3236                     float derivs[3][2][TGSI_QUAD_SIZE],
3237                     const int8_t offset[3],
3238                     enum tgsi_sampler_control control,
3239                     float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
3240 {
3241    struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler;
3242
3243    assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
3244    assert(sampler_index < PIPE_MAX_SAMPLERS);
3245    assert(sp_samp->sp_sampler[sampler_index]);
3246    /* always have a view here but texture is NULL if no sampler view was set. */
3247    if (!sp_samp->sp_sview[sview_index].base.texture) {
3248       int i, j;
3249       for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
3250          for (i = 0; i < TGSI_QUAD_SIZE; i++) {
3251             rgba[j][i] = 0.0f;
3252          }
3253       }
3254       return;
3255    }
3256    sp_samp->sp_sview[sview_index].get_samples(&sp_samp->sp_sview[sview_index],
3257                                               sp_samp->sp_sampler[sampler_index],
3258                                               s, t, p, c0, lod, control, rgba);
3259 }
3260
3261
3262 static void
3263 sp_tgsi_get_texel(struct tgsi_sampler *tgsi_sampler,
3264                   const unsigned sview_index,
3265                   const int i[TGSI_QUAD_SIZE],
3266                   const int j[TGSI_QUAD_SIZE], const int k[TGSI_QUAD_SIZE],
3267                   const int lod[TGSI_QUAD_SIZE], const int8_t offset[3],
3268                   float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
3269 {
3270    struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler;
3271
3272    assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
3273    /* always have a view here but texture is NULL if no sampler view was set. */
3274    if (!sp_samp->sp_sview[sview_index].base.texture) {
3275       int i, j;
3276       for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
3277          for (i = 0; i < TGSI_QUAD_SIZE; i++) {
3278             rgba[j][i] = 0.0f;
3279          }
3280       }
3281       return;
3282    }
3283    sp_get_texels(&sp_samp->sp_sview[sview_index], i, j, k, lod, offset, rgba);
3284 }
3285
3286
3287 struct sp_tgsi_sampler *
3288 sp_create_tgsi_sampler(void)
3289 {
3290    struct sp_tgsi_sampler *samp = CALLOC_STRUCT(sp_tgsi_sampler);
3291    if (!samp)
3292       return NULL;
3293
3294    samp->base.get_dims = sp_tgsi_get_dims;
3295    samp->base.get_samples = sp_tgsi_get_samples;
3296    samp->base.get_texel = sp_tgsi_get_texel;
3297
3298    return samp;
3299 }
3300