src/gallium/drivers/softpipe/sp_tex_sample.c

   1 /**************************************************************************
   2  *
   3  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
   4  * All Rights Reserved.
   5  * Copyright 2008-2010 VMware, Inc.  All rights reserved.
   6  *
   7  * Permission is hereby granted, free of charge, to any person obtaining a
   8  * copy of this software and associated documentation files (the
   9  * "Software"), to deal in the Software without restriction, including
  10  * without limitation the rights to use, copy, modify, merge, publish,
  11  * distribute, sub license, and/or sell copies of the Software, and to
  12  * permit persons to whom the Software is furnished to do so, subject to
  13  * the following conditions:
  14  *
  15  * The above copyright notice and this permission notice (including the
  16  * next paragraph) shall be included in all copies or substantial portions
  17  * of the Software.
  18  *
  19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  22  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26  *
  27  **************************************************************************/
  28
  29 /**
  30  * Texture sampling
  31  *
  32  * Authors:
  33  *   Brian Paul
  34  *   Keith Whitwell
  35  */
  36
  37 #include "pipe/p_context.h"
  38 #include "pipe/p_defines.h"
  39 #include "pipe/p_shader_tokens.h"
  40 #include "util/u_math.h"
  41 #include "util/u_memory.h"
  42 #include "sp_quad.h"   /* only for #define QUAD_* tokens */
  43 #include "sp_tex_sample.h"
  44 #include "sp_tex_tile_cache.h"
  45
  46
  47 /** Set to one to help debug texture sampling */
  48 #define DEBUG_TEX 0
  49
  50
  51 /*
  52  * Return fractional part of 'f'.  Used for computing interpolation weights.
  53  * Need to be careful with negative values.
  54  * Note, if this function isn't perfect you'll sometimes see 1-pixel bands
  55  * of improperly weighted linear-filtered textures.
  56  * The tests/texwrap.c demo is a good test.
  57  */
  58 static INLINE float
  59 frac(float f)
  60 {
  61    return f - floorf(f);
  62 }
  63
  64
  65
  66 /**
  67  * Linear interpolation macro
  68  */
  69 static INLINE float
  70 lerp(float a, float v0, float v1)
  71 {
  72    return v0 + a * (v1 - v0);
  73 }
  74
  75
  76 /**
  77  * Do 2D/bilinear interpolation of float values.
  78  * v00, v10, v01 and v11 are typically four texture samples in a square/box.
  79  * a and b are the horizontal and vertical interpolants.
  80  * It's important that this function is inlined when compiled with
  81  * optimization!  If we find that's not true on some systems, convert
  82  * to a macro.
  83  */
  84 static INLINE float
  85 lerp_2d(float a, float b,
  86         float v00, float v10, float v01, float v11)
  87 {
  88    const float temp0 = lerp(a, v00, v10);
  89    const float temp1 = lerp(a, v01, v11);
  90    return lerp(b, temp0, temp1);
  91 }
  92
  93
  94 /**
  95  * As above, but 3D interpolation of 8 values.
  96  */
  97 static INLINE float
  98 lerp_3d(float a, float b, float c,
  99         float v000, float v100, float v010, float v110,
 100         float v001, float v101, float v011, float v111)
 101 {
 102    const float temp0 = lerp_2d(a, b, v000, v100, v010, v110);
 103    const float temp1 = lerp_2d(a, b, v001, v101, v011, v111);
 104    return lerp(c, temp0, temp1);
 105 }
 106
 107
 108
 109 /**
 110  * Compute coord % size for repeat wrap modes.
 111  * Note that if coord is negative, coord % size doesn't give the right
 112  * value.  To avoid that problem we add a large multiple of the size
 113  * (rather than using a conditional).
 114  */
 115 static INLINE int
 116 repeat(int coord, unsigned size)
 117 {
 118    return (coord + size * 1024) % size;
 119 }
 120
 121
 122 /**
 123  * Apply texture coord wrapping mode and return integer texture indexes
 124  * for a vector of four texcoords (S or T or P).
 125  * \param wrapMode  PIPE_TEX_WRAP_x
 126  * \param s  the incoming texcoords
 127  * \param size  the texture image size
 128  * \param icoord  returns the integer texcoords
 129  * \return  integer texture index
 130  */
 131 static void
 132 wrap_nearest_repeat(const float s[4], unsigned size, int icoord[4])
 133 {
 134    uint ch;
 135    /* s limited to [0,1) */
 136    /* i limited to [0,size-1] */
 137    for (ch = 0; ch < 4; ch++) {
 138       int i = util_ifloor(s[ch] * size);
 139       icoord[ch] = repeat(i, size);
 140    }
 141 }
 142
 143
 144 static void
 145 wrap_nearest_clamp(const float s[4], unsigned size, int icoord[4])
 146 {
 147    uint ch;
 148    /* s limited to [0,1] */
 149    /* i limited to [0,size-1] */
 150    for (ch = 0; ch < 4; ch++) {
 151       if (s[ch] <= 0.0F)
 152          icoord[ch] = 0;
 153       else if (s[ch] >= 1.0F)
 154          icoord[ch] = size - 1;
 155       else
 156          icoord[ch] = util_ifloor(s[ch] * size);
 157    }
 158 }
 159
 160
 161 static void
 162 wrap_nearest_clamp_to_edge(const float s[4], unsigned size, int icoord[4])
 163 {
 164    uint ch;
 165    /* s limited to [min,max] */
 166    /* i limited to [0, size-1] */
 167    const float min = 1.0F / (2.0F * size);
 168    const float max = 1.0F - min;
 169    for (ch = 0; ch < 4; ch++) {
 170       if (s[ch] < min)
 171          icoord[ch] = 0;
 172       else if (s[ch] > max)
 173          icoord[ch] = size - 1;
 174       else
 175          icoord[ch] = util_ifloor(s[ch] * size);
 176    }
 177 }
 178
 179
 180 static void
 181 wrap_nearest_clamp_to_border(const float s[4], unsigned size, int icoord[4])
 182 {
 183    uint ch;
 184    /* s limited to [min,max] */
 185    /* i limited to [-1, size] */
 186    const float min = -1.0F / (2.0F * size);
 187    const float max = 1.0F - min;
 188    for (ch = 0; ch < 4; ch++) {
 189       if (s[ch] <= min)
 190          icoord[ch] = -1;
 191       else if (s[ch] >= max)
 192          icoord[ch] = size;
 193       else
 194          icoord[ch] = util_ifloor(s[ch] * size);
 195    }
 196 }
 197
 198
 199 static void
 200 wrap_nearest_mirror_repeat(const float s[4], unsigned size, int icoord[4])
 201 {
 202    uint ch;
 203    const float min = 1.0F / (2.0F * size);
 204    const float max = 1.0F - min;
 205    for (ch = 0; ch < 4; ch++) {
 206       const int flr = util_ifloor(s[ch]);
 207       float u = frac(s[ch]);
 208       if (flr & 1)
 209          u = 1.0F - u;
 210       if (u < min)
 211          icoord[ch] = 0;
 212       else if (u > max)
 213          icoord[ch] = size - 1;
 214       else
 215          icoord[ch] = util_ifloor(u * size);
 216    }
 217 }
 218
 219
 220 static void
 221 wrap_nearest_mirror_clamp(const float s[4], unsigned size, int icoord[4])
 222 {
 223    uint ch;
 224    for (ch = 0; ch < 4; ch++) {
 225       /* s limited to [0,1] */
 226       /* i limited to [0,size-1] */
 227       const float u = fabsf(s[ch]);
 228       if (u <= 0.0F)
 229          icoord[ch] = 0;
 230       else if (u >= 1.0F)
 231          icoord[ch] = size - 1;
 232       else
 233          icoord[ch] = util_ifloor(u * size);
 234    }
 235 }
 236
 237
 238 static void
 239 wrap_nearest_mirror_clamp_to_edge(const float s[4], unsigned size,
 240                                   int icoord[4])
 241 {
 242    uint ch;
 243    /* s limited to [min,max] */
 244    /* i limited to [0, size-1] */
 245    const float min = 1.0F / (2.0F * size);
 246    const float max = 1.0F - min;
 247    for (ch = 0; ch < 4; ch++) {
 248       const float u = fabsf(s[ch]);
 249       if (u < min)
 250          icoord[ch] = 0;
 251       else if (u > max)
 252          icoord[ch] = size - 1;
 253       else
 254          icoord[ch] = util_ifloor(u * size);
 255    }
 256 }
 257
 258
 259 static void
 260 wrap_nearest_mirror_clamp_to_border(const float s[4], unsigned size,
 261                                     int icoord[4])
 262 {
 263    uint ch;
 264    /* s limited to [min,max] */
 265    /* i limited to [0, size-1] */
 266    const float min = -1.0F / (2.0F * size);
 267    const float max = 1.0F - min;
 268    for (ch = 0; ch < 4; ch++) {
 269       const float u = fabsf(s[ch]);
 270       if (u < min)
 271          icoord[ch] = -1;
 272       else if (u > max)
 273          icoord[ch] = size;
 274       else
 275          icoord[ch] = util_ifloor(u * size);
 276    }
 277 }
 278
 279
 280 /**
 281  * Used to compute texel locations for linear sampling for four texcoords.
 282  * \param wrapMode  PIPE_TEX_WRAP_x
 283  * \param s  the texcoords
 284  * \param size  the texture image size
 285  * \param icoord0  returns first texture indexes
 286  * \param icoord1  returns second texture indexes (usually icoord0 + 1)
 287  * \param w  returns blend factor/weight between texture indexes
 288  * \param icoord  returns the computed integer texture coords
 289  */
 290 static void
 291 wrap_linear_repeat(const float s[4], unsigned size,
 292                    int icoord0[4], int icoord1[4], float w[4])
 293 {
 294    uint ch;
 295    for (ch = 0; ch < 4; ch++) {
 296       float u = s[ch] * size - 0.5F;
 297       icoord0[ch] = repeat(util_ifloor(u), size);
 298       icoord1[ch] = repeat(icoord0[ch] + 1, size);
 299       w[ch] = frac(u);
 300    }
 301 }
 302
 303
 304 static void
 305 wrap_linear_clamp(const float s[4], unsigned size,
 306                   int icoord0[4], int icoord1[4], float w[4])
 307 {
 308    uint ch;
 309    for (ch = 0; ch < 4; ch++) {
 310       float u = CLAMP(s[ch], 0.0F, 1.0F);
 311       u = u * size - 0.5f;
 312       icoord0[ch] = util_ifloor(u);
 313       icoord1[ch] = icoord0[ch] + 1;
 314       w[ch] = frac(u);
 315    }
 316 }
 317
 318
 319 static void
 320 wrap_linear_clamp_to_edge(const float s[4], unsigned size,
 321                           int icoord0[4], int icoord1[4], float w[4])
 322 {
 323    uint ch;
 324    for (ch = 0; ch < 4; ch++) {
 325       float u = CLAMP(s[ch], 0.0F, 1.0F);
 326       u = u * size - 0.5f;
 327       icoord0[ch] = util_ifloor(u);
 328       icoord1[ch] = icoord0[ch] + 1;
 329       if (icoord0[ch] < 0)
 330          icoord0[ch] = 0;
 331       if (icoord1[ch] >= (int) size)
 332          icoord1[ch] = size - 1;
 333       w[ch] = frac(u);
 334    }
 335 }
 336
 337
 338 static void
 339 wrap_linear_clamp_to_border(const float s[4], unsigned size,
 340                             int icoord0[4], int icoord1[4], float w[4])
 341 {
 342    const float min = -1.0F / (2.0F * size);
 343    const float max = 1.0F - min;
 344    uint ch;
 345    for (ch = 0; ch < 4; ch++) {
 346       float u = CLAMP(s[ch], min, max);
 347       u = u * size - 0.5f;
 348       icoord0[ch] = util_ifloor(u);
 349       icoord1[ch] = icoord0[ch] + 1;
 350       w[ch] = frac(u);
 351    }
 352 }
 353
 354
 355 static void
 356 wrap_linear_mirror_repeat(const float s[4], unsigned size,
 357                           int icoord0[4], int icoord1[4], float w[4])
 358 {
 359    uint ch;
 360    for (ch = 0; ch < 4; ch++) {
 361       const int flr = util_ifloor(s[ch]);
 362       float u = frac(s[ch]);
 363       if (flr & 1)
 364          u = 1.0F - u;
 365       u = u * size - 0.5F;
 366       icoord0[ch] = util_ifloor(u);
 367       icoord1[ch] = icoord0[ch] + 1;
 368       if (icoord0[ch] < 0)
 369          icoord0[ch] = 0;
 370       if (icoord1[ch] >= (int) size)
 371          icoord1[ch] = size - 1;
 372       w[ch] = frac(u);
 373    }
 374 }
 375
 376
 377 static void
 378 wrap_linear_mirror_clamp(const float s[4], unsigned size,
 379                          int icoord0[4], int icoord1[4], float w[4])
 380 {
 381    uint ch;
 382    for (ch = 0; ch < 4; ch++) {
 383       float u = fabsf(s[ch]);
 384       if (u >= 1.0F)
 385          u = (float) size;
 386       else
 387          u *= size;
 388       u -= 0.5F;
 389       icoord0[ch] = util_ifloor(u);
 390       icoord1[ch] = icoord0[ch] + 1;
 391       w[ch] = frac(u);
 392    }
 393 }
 394
 395
 396 static void
 397 wrap_linear_mirror_clamp_to_edge(const float s[4], unsigned size,
 398                                  int icoord0[4], int icoord1[4], float w[4])
 399 {
 400    uint ch;
 401    for (ch = 0; ch < 4; ch++) {
 402       float u = fabsf(s[ch]);
 403       if (u >= 1.0F)
 404          u = (float) size;
 405       else
 406          u *= size;
 407       u -= 0.5F;
 408       icoord0[ch] = util_ifloor(u);
 409       icoord1[ch] = icoord0[ch] + 1;
 410       if (icoord0[ch] < 0)
 411          icoord0[ch] = 0;
 412       if (icoord1[ch] >= (int) size)
 413          icoord1[ch] = size - 1;
 414       w[ch] = frac(u);
 415    }
 416 }
 417
 418
 419 static void
 420 wrap_linear_mirror_clamp_to_border(const float s[4], unsigned size,
 421                                    int icoord0[4], int icoord1[4], float w[4])
 422 {
 423    const float min = -1.0F / (2.0F * size);
 424    const float max = 1.0F - min;
 425    uint ch;
 426    for (ch = 0; ch < 4; ch++) {
 427       float u = fabsf(s[ch]);
 428       if (u <= min)
 429          u = min * size;
 430       else if (u >= max)
 431          u = max * size;
 432       else
 433          u *= size;
 434       u -= 0.5F;
 435       icoord0[ch] = util_ifloor(u);
 436       icoord1[ch] = icoord0[ch] + 1;
 437       w[ch] = frac(u);
 438    }
 439 }
 440
 441
 442 /**
 443  * PIPE_TEX_WRAP_CLAMP for nearest sampling, unnormalized coords.
 444  */
 445 static void
 446 wrap_nearest_unorm_clamp(const float s[4], unsigned size, int icoord[4])
 447 {
 448    uint ch;
 449    for (ch = 0; ch < 4; ch++) {
 450       int i = util_ifloor(s[ch]);
 451       icoord[ch]= CLAMP(i, 0, (int) size-1);
 452    }
 453 }
 454
 455
 456 /**
 457  * PIPE_TEX_WRAP_CLAMP_TO_BORDER for nearest sampling, unnormalized coords.
 458  */
 459 static void
 460 wrap_nearest_unorm_clamp_to_border(const float s[4], unsigned size,
 461                                    int icoord[4])
 462 {
 463    uint ch;
 464    for (ch = 0; ch < 4; ch++) {
 465       icoord[ch]= util_ifloor( CLAMP(s[ch], -0.5F, (float) size + 0.5F) );
 466    }
 467 }
 468
 469
 470 /**
 471  * PIPE_TEX_WRAP_CLAMP_TO_EDGE for nearest sampling, unnormalized coords.
 472  */
 473 static void
 474 wrap_nearest_unorm_clamp_to_edge(const float s[4], unsigned size,
 475                                  int icoord[4])
 476 {
 477    uint ch;
 478    for (ch = 0; ch < 4; ch++) {
 479       icoord[ch]= util_ifloor( CLAMP(s[ch], 0.5F, (float) size - 0.5F) );
 480    }
 481 }
 482
 483
 484 /**
 485  * PIPE_TEX_WRAP_CLAMP for linear sampling, unnormalized coords.
 486  */
 487 static void
 488 wrap_linear_unorm_clamp(const float s[4], unsigned size,
 489                         int icoord0[4], int icoord1[4], float w[4])
 490 {
 491    uint ch;
 492    for (ch = 0; ch < 4; ch++) {
 493       /* Not exactly what the spec says, but it matches NVIDIA output */
 494       float u = CLAMP(s[ch] - 0.5F, 0.0f, (float) size - 1.0f);
 495       icoord0[ch] = util_ifloor(u);
 496       icoord1[ch] = icoord0[ch] + 1;
 497       w[ch] = frac(u);
 498    }
 499 }
 500
 501
 502 /**
 503  * PIPE_TEX_WRAP_CLAMP_TO_BORDER for linear sampling, unnormalized coords.
 504  */
 505 static void
 506 wrap_linear_unorm_clamp_to_border(const float s[4], unsigned size,
 507                                   int icoord0[4], int icoord1[4], float w[4])
 508 {
 509    uint ch;
 510    for (ch = 0; ch < 4; ch++) {
 511       float u = CLAMP(s[ch], -0.5F, (float) size + 0.5F);
 512       u -= 0.5F;
 513       icoord0[ch] = util_ifloor(u);
 514       icoord1[ch] = icoord0[ch] + 1;
 515       if (icoord1[ch] > (int) size - 1)
 516          icoord1[ch] = size - 1;
 517       w[ch] = frac(u);
 518    }
 519 }
 520
 521
 522 /**
 523  * PIPE_TEX_WRAP_CLAMP_TO_EDGE for linear sampling, unnormalized coords.
 524  */
 525 static void
 526 wrap_linear_unorm_clamp_to_edge(const float s[4], unsigned size,
 527                                 int icoord0[4], int icoord1[4], float w[4])
 528 {
 529    uint ch;
 530    for (ch = 0; ch < 4; ch++) {
 531       float u = CLAMP(s[ch], +0.5F, (float) size - 0.5F);
 532       u -= 0.5F;
 533       icoord0[ch] = util_ifloor(u);
 534       icoord1[ch] = icoord0[ch] + 1;
 535       if (icoord1[ch] > (int) size - 1)
 536          icoord1[ch] = size - 1;
 537       w[ch] = frac(u);
 538    }
 539 }
 540
 541
 542 /**
 543  * Do coordinate to array index conversion.  For array textures.
 544  */
 545 static INLINE void
 546 wrap_array_layer(const float coord[4], unsigned size, int layer[4])
 547 {
 548    uint ch;
 549    for (ch = 0; ch < 4; ch++) {
 550       int c = util_ifloor(coord[ch] + 0.5F);
 551       layer[ch] = CLAMP(c, 0, size - 1);
 552    }
 553 }
 554
 555
 556 /**
 557  * Examine the quad's texture coordinates to compute the partial
 558  * derivatives w.r.t X and Y, then compute lambda (level of detail).
 559  */
 560 static float
 561 compute_lambda_1d(const struct sp_sampler_variant *samp,
 562                   const float s[QUAD_SIZE],
 563                   const float t[QUAD_SIZE],
 564                   const float p[QUAD_SIZE])
 565 {
 566    const struct pipe_resource *texture = samp->view->texture;
 567    float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
 568    float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
 569    float rho = MAX2(dsdx, dsdy) * texture->width0;
 570
 571    return util_fast_log2(rho);
 572 }
 573
 574
 575 static float
 576 compute_lambda_2d(const struct sp_sampler_variant *samp,
 577                   const float s[QUAD_SIZE],
 578                   const float t[QUAD_SIZE],
 579                   const float p[QUAD_SIZE])
 580 {
 581    const struct pipe_resource *texture = samp->view->texture;
 582    float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
 583    float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
 584    float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
 585    float dtdy = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
 586    float maxx = MAX2(dsdx, dsdy) * texture->width0;
 587    float maxy = MAX2(dtdx, dtdy) * texture->height0;
 588    float rho  = MAX2(maxx, maxy);
 589
 590    return util_fast_log2(rho);
 591 }
 592
 593
 594 static float
 595 compute_lambda_3d(const struct sp_sampler_variant *samp,
 596                   const float s[QUAD_SIZE],
 597                   const float t[QUAD_SIZE],
 598                   const float p[QUAD_SIZE])
 599 {
 600    const struct pipe_resource *texture = samp->view->texture;
 601    float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
 602    float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
 603    float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
 604    float dtdy = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
 605    float dpdx = fabsf(p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]);
 606    float dpdy = fabsf(p[QUAD_TOP_LEFT]     - p[QUAD_BOTTOM_LEFT]);
 607    float maxx = MAX2(dsdx, dsdy) * texture->width0;
 608    float maxy = MAX2(dtdx, dtdy) * texture->height0;
 609    float maxz = MAX2(dpdx, dpdy) * texture->depth0;
 610    float rho;
 611
 612    rho = MAX2(maxx, maxy);
 613    rho = MAX2(rho, maxz);
 614
 615    return util_fast_log2(rho);
 616 }
 617
 618
 619 /**
 620  * Compute lambda for a vertex texture sampler.
 621  * Since there aren't derivatives to use, just return 0.
 622  */
 623 static float
 624 compute_lambda_vert(const struct sp_sampler_variant *samp,
 625                     const float s[QUAD_SIZE],
 626                     const float t[QUAD_SIZE],
 627                     const float p[QUAD_SIZE])
 628 {
 629    return 0.0f;
 630 }
 631
 632
 633
 634 /**
 635  * Get a texel from a texture, using the texture tile cache.
 636  *
 637  * \param addr  the template tex address containing cube, z, face info.
 638  * \param x  the x coord of texel within 2D image
 639  * \param y  the y coord of texel within 2D image
 640  * \param rgba  the quad to put the texel/color into
 641  *
 642  * XXX maybe move this into sp_tex_tile_cache.c and merge with the
 643  * sp_get_cached_tile_tex() function.  Also, get 4 texels instead of 1...
 644  */
 645
 646
 647
 648
 649 static INLINE const float *
 650 get_texel_2d_no_border(const struct sp_sampler_variant *samp,
 651                        union tex_tile_address addr, int x, int y)
 652 {
 653    const struct softpipe_tex_cached_tile *tile;
 654
 655    addr.bits.x = x / TILE_SIZE;
 656    addr.bits.y = y / TILE_SIZE;
 657    y %= TILE_SIZE;
 658    x %= TILE_SIZE;
 659
 660    tile = sp_get_cached_tile_tex(samp->cache, addr);
 661
 662    return &tile->data.color[y][x][0];
 663 }
 664
 665
 666 static INLINE const float *
 667 get_texel_2d(const struct sp_sampler_variant *samp,
 668              union tex_tile_address addr, int x, int y)
 669 {
 670    const struct pipe_resource *texture = samp->view->texture;
 671    unsigned level = addr.bits.level;
 672
 673    if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
 674        y < 0 || y >= (int) u_minify(texture->height0, level)) {
 675       return samp->sampler->border_color;
 676    }
 677    else {
 678       return get_texel_2d_no_border( samp, addr, x, y );
 679    }
 680 }
 681
 682
 683 /* Gather a quad of adjacent texels within a tile:
 684  */
 685 static INLINE void
 686 get_texel_quad_2d_no_border_single_tile(const struct sp_sampler_variant *samp,
 687                                         union tex_tile_address addr,
 688                                         unsigned x, unsigned y,
 689                                         const float *out[4])
 690 {
 691    const struct softpipe_tex_cached_tile *tile;
 692
 693    addr.bits.x = x / TILE_SIZE;
 694    addr.bits.y = y / TILE_SIZE;
 695    y %= TILE_SIZE;
 696    x %= TILE_SIZE;
 697
 698    tile = sp_get_cached_tile_tex(samp->cache, addr);
 699
 700    out[0] = &tile->data.color[y  ][x  ][0];
 701    out[1] = &tile->data.color[y  ][x+1][0];
 702    out[2] = &tile->data.color[y+1][x  ][0];
 703    out[3] = &tile->data.color[y+1][x+1][0];
 704 }
 705
 706
 707 /* Gather a quad of potentially non-adjacent texels:
 708  */
 709 static INLINE void
 710 get_texel_quad_2d_no_border(const struct sp_sampler_variant *samp,
 711                             union tex_tile_address addr,
 712                             int x0, int y0,
 713                             int x1, int y1,
 714                             const float *out[4])
 715 {
 716    out[0] = get_texel_2d_no_border( samp, addr, x0, y0 );
 717    out[1] = get_texel_2d_no_border( samp, addr, x1, y0 );
 718    out[2] = get_texel_2d_no_border( samp, addr, x0, y1 );
 719    out[3] = get_texel_2d_no_border( samp, addr, x1, y1 );
 720 }
 721
 722 /* Can involve a lot of unnecessary checks for border color:
 723  */
 724 static INLINE void
 725 get_texel_quad_2d(const struct sp_sampler_variant *samp,
 726                   union tex_tile_address addr,
 727                   int x0, int y0,
 728                   int x1, int y1,
 729                   const float *out[4])
 730 {
 731    out[0] = get_texel_2d( samp, addr, x0, y0 );
 732    out[1] = get_texel_2d( samp, addr, x1, y0 );
 733    out[3] = get_texel_2d( samp, addr, x1, y1 );
 734    out[2] = get_texel_2d( samp, addr, x0, y1 );
 735 }
 736
 737
 738
 739 /* 3d variants:
 740  */
 741 static INLINE const float *
 742 get_texel_3d_no_border(const struct sp_sampler_variant *samp,
 743                        union tex_tile_address addr, int x, int y, int z)
 744 {
 745    const struct softpipe_tex_cached_tile *tile;
 746
 747    addr.bits.x = x / TILE_SIZE;
 748    addr.bits.y = y / TILE_SIZE;
 749    addr.bits.z = z;
 750    y %= TILE_SIZE;
 751    x %= TILE_SIZE;
 752
 753    tile = sp_get_cached_tile_tex(samp->cache, addr);
 754
 755    return &tile->data.color[y][x][0];
 756 }
 757
 758
 759 static INLINE const float *
 760 get_texel_3d(const struct sp_sampler_variant *samp,
 761              union tex_tile_address addr, int x, int y, int z)
 762 {
 763    const struct pipe_resource *texture = samp->view->texture;
 764    unsigned level = addr.bits.level;
 765
 766    if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
 767        y < 0 || y >= (int) u_minify(texture->height0, level) ||
 768        z < 0 || z >= (int) u_minify(texture->depth0, level)) {
 769       return samp->sampler->border_color;
 770    }
 771    else {
 772       return get_texel_3d_no_border( samp, addr, x, y, z );
 773    }
 774 }
 775
 776
 777 /**
 778  * Given the logbase2 of a mipmap's base level size and a mipmap level,
 779  * return the size (in texels) of that mipmap level.
 780  * For example, if level[0].width = 256 then base_pot will be 8.
 781  * If level = 2, then we'll return 64 (the width at level=2).
 782  * Return 1 if level > base_pot.
 783  */
 784 static INLINE unsigned
 785 pot_level_size(unsigned base_pot, unsigned level)
 786 {
 787    return (base_pot >= level) ? (1 << (base_pot - level)) : 1;
 788 }
 789
 790
 791 static void
 792 print_sample(const char *function, float rgba[NUM_CHANNELS][QUAD_SIZE])
 793 {
 794    debug_printf("%s %g %g %g %g, %g %g %g %g, %g %g %g %g, %g %g %g %g\n",
 795                 function,
 796                 rgba[0][0], rgba[1][0], rgba[2][0], rgba[3][0],
 797                 rgba[0][1], rgba[1][1], rgba[2][1], rgba[3][1],
 798                 rgba[0][2], rgba[1][2], rgba[2][2], rgba[3][2],
 799                 rgba[0][3], rgba[1][3], rgba[2][3], rgba[3][3]);
 800 }
 801
 802
 803 /* Some image-filter fastpaths:
 804  */
 805 static INLINE void
 806 img_filter_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler,
 807                                 const float s[QUAD_SIZE],
 808                                 const float t[QUAD_SIZE],
 809                                 const float p[QUAD_SIZE],
 810                                 const float c0[QUAD_SIZE],
 811                                 enum tgsi_sampler_control control,
 812                                 float rgba[NUM_CHANNELS][QUAD_SIZE])
 813 {
 814    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
 815    unsigned  j;
 816    unsigned level = samp->level;
 817    unsigned xpot = pot_level_size(samp->xpot, level);
 818    unsigned ypot = pot_level_size(samp->ypot, level);
 819    unsigned xmax = (xpot - 1) & (TILE_SIZE - 1); /* MIN2(TILE_SIZE, xpot) - 1; */
 820    unsigned ymax = (ypot - 1) & (TILE_SIZE - 1); /* MIN2(TILE_SIZE, ypot) - 1; */
 821    union tex_tile_address addr;
 822
 823    addr.value = 0;
 824    addr.bits.level = samp->level;
 825
 826    for (j = 0; j < QUAD_SIZE; j++) {
 827       int c;
 828
 829       float u = s[j] * xpot - 0.5F;
 830       float v = t[j] * ypot - 0.5F;
 831
 832       int uflr = util_ifloor(u);
 833       int vflr = util_ifloor(v);
 834
 835       float xw = u - (float)uflr;
 836       float yw = v - (float)vflr;
 837
 838       int x0 = uflr & (xpot - 1);
 839       int y0 = vflr & (ypot - 1);
 840
 841       const float *tx[4];
 842
 843       /* Can we fetch all four at once:
 844        */
 845       if (x0 < xmax && y0 < ymax) {
 846          get_texel_quad_2d_no_border_single_tile(samp, addr, x0, y0, tx);
 847       }
 848       else {
 849          unsigned x1 = (x0 + 1) & (xpot - 1);
 850          unsigned y1 = (y0 + 1) & (ypot - 1);
 851          get_texel_quad_2d_no_border(samp, addr, x0, y0, x1, y1, tx);
 852       }
 853
 854       /* interpolate R, G, B, A */
 855       for (c = 0; c < 4; c++) {
 856          rgba[c][j] = lerp_2d(xw, yw,
 857                               tx[0][c], tx[1][c],
 858                               tx[2][c], tx[3][c]);
 859       }
 860    }
 861
 862    if (DEBUG_TEX) {
 863       print_sample(__FUNCTION__, rgba);
 864    }
 865 }
 866
 867
 868 static INLINE void
 869 img_filter_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler,
 870                                  const float s[QUAD_SIZE],
 871                                  const float t[QUAD_SIZE],
 872                                  const float p[QUAD_SIZE],
 873                                  const float c0[QUAD_SIZE],
 874                                  enum tgsi_sampler_control control,
 875                                  float rgba[NUM_CHANNELS][QUAD_SIZE])
 876 {
 877    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
 878    unsigned  j;
 879    unsigned level = samp->level;
 880    unsigned xpot = pot_level_size(samp->xpot, level);
 881    unsigned ypot = pot_level_size(samp->ypot, level);
 882    union tex_tile_address addr;
 883
 884    addr.value = 0;
 885    addr.bits.level = samp->level;
 886
 887    for (j = 0; j < QUAD_SIZE; j++) {
 888       int c;
 889
 890       float u = s[j] * xpot;
 891       float v = t[j] * ypot;
 892
 893       int uflr = util_ifloor(u);
 894       int vflr = util_ifloor(v);
 895
 896       int x0 = uflr & (xpot - 1);
 897       int y0 = vflr & (ypot - 1);
 898
 899       const float *out = get_texel_2d_no_border(samp, addr, x0, y0);
 900
 901       for (c = 0; c < 4; c++) {
 902          rgba[c][j] = out[c];
 903       }
 904    }
 905
 906    if (DEBUG_TEX) {
 907       print_sample(__FUNCTION__, rgba);
 908    }
 909 }
 910
 911
 912 static INLINE void
 913 img_filter_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler,
 914                                 const float s[QUAD_SIZE],
 915                                 const float t[QUAD_SIZE],
 916                                 const float p[QUAD_SIZE],
 917                                 const float c0[QUAD_SIZE],
 918                                 enum tgsi_sampler_control control,
 919                                 float rgba[NUM_CHANNELS][QUAD_SIZE])
 920 {
 921    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
 922    unsigned  j;
 923    unsigned level = samp->level;
 924    unsigned xpot = pot_level_size(samp->xpot, level);
 925    unsigned ypot = pot_level_size(samp->ypot, level);
 926    union tex_tile_address addr;
 927
 928    addr.value = 0;
 929    addr.bits.level = samp->level;
 930
 931    for (j = 0; j < QUAD_SIZE; j++) {
 932       int c;
 933
 934       float u = s[j] * xpot;
 935       float v = t[j] * ypot;
 936
 937       int x0, y0;
 938       const float *out;
 939
 940       x0 = util_ifloor(u);
 941       if (x0 < 0)
 942          x0 = 0;
 943       else if (x0 > xpot - 1)
 944          x0 = xpot - 1;
 945
 946       y0 = util_ifloor(v);
 947       if (y0 < 0)
 948          y0 = 0;
 949       else if (y0 > ypot - 1)
 950          y0 = ypot - 1;
 951
 952       out = get_texel_2d_no_border(samp, addr, x0, y0);
 953
 954       for (c = 0; c < 4; c++) {
 955          rgba[c][j] = out[c];
 956       }
 957    }
 958
 959    if (DEBUG_TEX) {
 960       print_sample(__FUNCTION__, rgba);
 961    }
 962 }
 963
 964
 965 static void
 966 img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler,
 967                         const float s[QUAD_SIZE],
 968                         const float t[QUAD_SIZE],
 969                         const float p[QUAD_SIZE],
 970                         const float c0[QUAD_SIZE],
 971                         enum tgsi_sampler_control control,
 972                         float rgba[NUM_CHANNELS][QUAD_SIZE])
 973 {
 974    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
 975    const struct pipe_resource *texture = samp->view->texture;
 976    unsigned level0, j;
 977    int width;
 978    int x[4];
 979    union tex_tile_address addr;
 980
 981    level0 = samp->level;
 982    width = u_minify(texture->width0, level0);
 983
 984    assert(width > 0);
 985
 986    addr.value = 0;
 987    addr.bits.level = samp->level;
 988
 989    samp->nearest_texcoord_s(s, width, x);
 990
 991    for (j = 0; j < QUAD_SIZE; j++) {
 992       const float *out = get_texel_2d(samp, addr, x[j], 0);
 993       int c;
 994       for (c = 0; c < 4; c++) {
 995          rgba[c][j] = out[c];
 996       }
 997    }
 998
 999    if (DEBUG_TEX) {
1000       print_sample(__FUNCTION__, rgba);
1001    }
1002 }
1003
1004
1005 static void
1006 img_filter_1d_array_nearest(struct tgsi_sampler *tgsi_sampler,
1007                             const float s[QUAD_SIZE],
1008                             const float t[QUAD_SIZE],
1009                             const float p[QUAD_SIZE],
1010                             const float c0[QUAD_SIZE],
1011                             enum tgsi_sampler_control control,
1012                             float rgba[NUM_CHANNELS][QUAD_SIZE])
1013 {
1014    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1015    const struct pipe_resource *texture = samp->view->texture;
1016    unsigned level0, j;
1017    int width;
1018    int x[4], layer[4];
1019    union tex_tile_address addr;
1020
1021    level0 = samp->level;
1022    width = u_minify(texture->width0, level0);
1023
1024    assert(width > 0);
1025
1026    addr.value = 0;
1027    addr.bits.level = samp->level;
1028
1029    samp->nearest_texcoord_s(s, width, x);
1030    wrap_array_layer(t, texture->height0, layer);
1031
1032    for (j = 0; j < QUAD_SIZE; j++) {
1033       const float *out = get_texel_2d(samp, addr, x[j], layer[j]);
1034       int c;
1035       for (c = 0; c < 4; c++) {
1036          rgba[c][j] = out[c];
1037       }
1038    }
1039
1040    if (DEBUG_TEX) {
1041       print_sample(__FUNCTION__, rgba);
1042    }
1043 }
1044
1045
1046 static void
1047 img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler,
1048                       const float s[QUAD_SIZE],
1049                       const float t[QUAD_SIZE],
1050                       const float p[QUAD_SIZE],
1051                       const float c0[QUAD_SIZE],
1052                       enum tgsi_sampler_control control,
1053                       float rgba[NUM_CHANNELS][QUAD_SIZE])
1054 {
1055    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1056    const struct pipe_resource *texture = samp->view->texture;
1057    unsigned level0, j;
1058    int width, height;
1059    int x[4], y[4];
1060    union tex_tile_address addr;
1061
1062
1063    level0 = samp->level;
1064    width = u_minify(texture->width0, level0);
1065    height = u_minify(texture->height0, level0);
1066
1067    assert(width > 0);
1068    assert(height > 0);
1069
1070    addr.value = 0;
1071    addr.bits.level = samp->level;
1072
1073    samp->nearest_texcoord_s(s, width, x);
1074    samp->nearest_texcoord_t(t, height, y);
1075
1076    for (j = 0; j < QUAD_SIZE; j++) {
1077       const float *out = get_texel_2d(samp, addr, x[j], y[j]);
1078       int c;
1079       for (c = 0; c < 4; c++) {
1080          rgba[c][j] = out[c];
1081       }
1082    }
1083
1084    if (DEBUG_TEX) {
1085       print_sample(__FUNCTION__, rgba);
1086    }
1087 }
1088
1089
1090 static void
1091 img_filter_2d_array_nearest(struct tgsi_sampler *tgsi_sampler,
1092                             const float s[QUAD_SIZE],
1093                             const float t[QUAD_SIZE],
1094                             const float p[QUAD_SIZE],
1095                             const float c0[QUAD_SIZE],
1096                             enum tgsi_sampler_control control,
1097                             float rgba[NUM_CHANNELS][QUAD_SIZE])
1098 {
1099    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1100    const struct pipe_resource *texture = samp->view->texture;
1101    unsigned level0, j;
1102    int width, height;
1103    int x[4], y[4], layer[4];
1104    union tex_tile_address addr;
1105
1106    level0 = samp->level;
1107    width = u_minify(texture->width0, level0);
1108    height = u_minify(texture->height0, level0);
1109
1110    assert(width > 0);
1111    assert(height > 0);
1112
1113    addr.value = 0;
1114    addr.bits.level = samp->level;
1115
1116    samp->nearest_texcoord_s(s, width, x);
1117    samp->nearest_texcoord_t(t, height, y);
1118    wrap_array_layer(p, texture->depth0, layer);
1119
1120    for (j = 0; j < QUAD_SIZE; j++) {
1121       const float *out = get_texel_3d(samp, addr, x[j], y[j], layer[j]);
1122       int c;
1123       for (c = 0; c < 4; c++) {
1124          rgba[c][j] = out[c];
1125       }
1126    }
1127
1128    if (DEBUG_TEX) {
1129       print_sample(__FUNCTION__, rgba);
1130    }
1131 }
1132
1133
1134 static INLINE union tex_tile_address
1135 face(union tex_tile_address addr, unsigned face )
1136 {
1137    addr.bits.face = face;
1138    return addr;
1139 }
1140
1141
1142 static void
1143 img_filter_cube_nearest(struct tgsi_sampler *tgsi_sampler,
1144                         const float s[QUAD_SIZE],
1145                         const float t[QUAD_SIZE],
1146                         const float p[QUAD_SIZE],
1147                         const float c0[QUAD_SIZE],
1148                         enum tgsi_sampler_control control,
1149                         float rgba[NUM_CHANNELS][QUAD_SIZE])
1150 {
1151    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1152    const struct pipe_resource *texture = samp->view->texture;
1153    const unsigned *faces = samp->faces; /* zero when not cube-mapping */
1154    unsigned level0, j;
1155    int width, height;
1156    int x[4], y[4];
1157    union tex_tile_address addr;
1158
1159    level0 = samp->level;
1160    width = u_minify(texture->width0, level0);
1161    height = u_minify(texture->height0, level0);
1162
1163    assert(width > 0);
1164    assert(height > 0);
1165
1166    addr.value = 0;
1167    addr.bits.level = samp->level;
1168
1169    samp->nearest_texcoord_s(s, width, x);
1170    samp->nearest_texcoord_t(t, height, y);
1171
1172    for (j = 0; j < QUAD_SIZE; j++) {
1173       const float *out = get_texel_2d(samp, face(addr, faces[j]), x[j], y[j]);
1174       int c;
1175       for (c = 0; c < 4; c++) {
1176          rgba[c][j] = out[c];
1177       }
1178    }
1179
1180    if (DEBUG_TEX) {
1181       print_sample(__FUNCTION__, rgba);
1182    }
1183 }
1184
1185
1186 static void
1187 img_filter_3d_nearest(struct tgsi_sampler *tgsi_sampler,
1188                       const float s[QUAD_SIZE],
1189                       const float t[QUAD_SIZE],
1190                       const float p[QUAD_SIZE],
1191                       const float c0[QUAD_SIZE],
1192                       enum tgsi_sampler_control control,
1193                       float rgba[NUM_CHANNELS][QUAD_SIZE])
1194 {
1195    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1196    const struct pipe_resource *texture = samp->view->texture;
1197    unsigned level0, j;
1198    int width, height, depth;
1199    int x[4], y[4], z[4];
1200    union tex_tile_address addr;
1201
1202    level0 = samp->level;
1203    width = u_minify(texture->width0, level0);
1204    height = u_minify(texture->height0, level0);
1205    depth = u_minify(texture->depth0, level0);
1206
1207    assert(width > 0);
1208    assert(height > 0);
1209    assert(depth > 0);
1210
1211    samp->nearest_texcoord_s(s, width,  x);
1212    samp->nearest_texcoord_t(t, height, y);
1213    samp->nearest_texcoord_p(p, depth,  z);
1214
1215    addr.value = 0;
1216    addr.bits.level = samp->level;
1217
1218    for (j = 0; j < QUAD_SIZE; j++) {
1219       const float *out = get_texel_3d(samp, addr, x[j], y[j], z[j]);
1220       int c;
1221       for (c = 0; c < 4; c++) {
1222          rgba[c][j] = out[c];
1223       }
1224    }
1225 }
1226
1227
1228 static void
1229 img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler,
1230                      const float s[QUAD_SIZE],
1231                      const float t[QUAD_SIZE],
1232                      const float p[QUAD_SIZE],
1233                      const float c0[QUAD_SIZE],
1234                      enum tgsi_sampler_control control,
1235                      float rgba[NUM_CHANNELS][QUAD_SIZE])
1236 {
1237    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1238    const struct pipe_resource *texture = samp->view->texture;
1239    unsigned level0, j;
1240    int width;
1241    int x0[4], x1[4];
1242    float xw[4]; /* weights */
1243    union tex_tile_address addr;
1244
1245    level0 = samp->level;
1246    width = u_minify(texture->width0, level0);
1247
1248    assert(width > 0);
1249
1250    addr.value = 0;
1251    addr.bits.level = samp->level;
1252
1253    samp->linear_texcoord_s(s, width, x0, x1, xw);
1254
1255    for (j = 0; j < QUAD_SIZE; j++) {
1256       const float *tx0 = get_texel_2d(samp, addr, x0[j], 0);
1257       const float *tx1 = get_texel_2d(samp, addr, x1[j], 0);
1258       int c;
1259
1260       /* interpolate R, G, B, A */
1261       for (c = 0; c < 4; c++) {
1262          rgba[c][j] = lerp(xw[j], tx0[c], tx1[c]);
1263       }
1264    }
1265 }
1266
1267
1268 static void
1269 img_filter_1d_array_linear(struct tgsi_sampler *tgsi_sampler,
1270                            const float s[QUAD_SIZE],
1271                            const float t[QUAD_SIZE],
1272                            const float p[QUAD_SIZE],
1273                            const float c0[QUAD_SIZE],
1274                            enum tgsi_sampler_control control,
1275                            float rgba[NUM_CHANNELS][QUAD_SIZE])
1276 {
1277    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1278    const struct pipe_resource *texture = samp->view->texture;
1279    unsigned level0, j;
1280    int width;
1281    int x0[4], x1[4], layer[4];
1282    float xw[4]; /* weights */
1283    union tex_tile_address addr;
1284
1285    level0 = samp->level;
1286    width = u_minify(texture->width0, level0);
1287
1288    assert(width > 0);
1289
1290    addr.value = 0;
1291    addr.bits.level = samp->level;
1292
1293    samp->linear_texcoord_s(s, width, x0, x1, xw);
1294    wrap_array_layer(t, texture->height0, layer);
1295
1296    for (j = 0; j < QUAD_SIZE; j++) {
1297       const float *tx0 = get_texel_2d(samp, addr, x0[j], layer[j]);
1298       const float *tx1 = get_texel_2d(samp, addr, x1[j], layer[j]);
1299       int c;
1300
1301       /* interpolate R, G, B, A */
1302       for (c = 0; c < 4; c++) {
1303          rgba[c][j] = lerp(xw[j], tx0[c], tx1[c]);
1304       }
1305    }
1306 }
1307
1308
1309 static void
1310 img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler,
1311                      const float s[QUAD_SIZE],
1312                      const float t[QUAD_SIZE],
1313                      const float p[QUAD_SIZE],
1314                      const float c0[QUAD_SIZE],
1315                      enum tgsi_sampler_control control,
1316                      float rgba[NUM_CHANNELS][QUAD_SIZE])
1317 {
1318    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1319    const struct pipe_resource *texture = samp->view->texture;
1320    unsigned level0, j;
1321    int width, height;
1322    int x0[4], y0[4], x1[4], y1[4];
1323    float xw[4], yw[4]; /* weights */
1324    union tex_tile_address addr;
1325
1326    level0 = samp->level;
1327    width = u_minify(texture->width0, level0);
1328    height = u_minify(texture->height0, level0);
1329
1330    assert(width > 0);
1331    assert(height > 0);
1332
1333    addr.value = 0;
1334    addr.bits.level = samp->level;
1335
1336    samp->linear_texcoord_s(s, width,  x0, x1, xw);
1337    samp->linear_texcoord_t(t, height, y0, y1, yw);
1338
1339    for (j = 0; j < QUAD_SIZE; j++) {
1340       const float *tx0 = get_texel_2d(samp, addr, x0[j], y0[j]);
1341       const float *tx1 = get_texel_2d(samp, addr, x1[j], y0[j]);
1342       const float *tx2 = get_texel_2d(samp, addr, x0[j], y1[j]);
1343       const float *tx3 = get_texel_2d(samp, addr, x1[j], y1[j]);
1344       int c;
1345
1346       /* interpolate R, G, B, A */
1347       for (c = 0; c < 4; c++) {
1348          rgba[c][j] = lerp_2d(xw[j], yw[j],
1349                               tx0[c], tx1[c],
1350                               tx2[c], tx3[c]);
1351       }
1352    }
1353 }
1354
1355
1356 static void
1357 img_filter_2d_array_linear(struct tgsi_sampler *tgsi_sampler,
1358                            const float s[QUAD_SIZE],
1359                            const float t[QUAD_SIZE],
1360                            const float p[QUAD_SIZE],
1361                            const float c0[QUAD_SIZE],
1362                            enum tgsi_sampler_control control,
1363                            float rgba[NUM_CHANNELS][QUAD_SIZE])
1364 {
1365    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1366    const struct pipe_resource *texture = samp->view->texture;
1367    unsigned level0, j;
1368    int width, height;
1369    int x0[4], y0[4], x1[4], y1[4], layer[4];
1370    float xw[4], yw[4]; /* weights */
1371    union tex_tile_address addr;
1372
1373    level0 = samp->level;
1374    width = u_minify(texture->width0, level0);
1375    height = u_minify(texture->height0, level0);
1376
1377    assert(width > 0);
1378    assert(height > 0);
1379
1380    addr.value = 0;
1381    addr.bits.level = samp->level;
1382
1383    samp->linear_texcoord_s(s, width,  x0, x1, xw);
1384    samp->linear_texcoord_t(t, height, y0, y1, yw);
1385    wrap_array_layer(p, texture->depth0, layer);
1386
1387    for (j = 0; j < QUAD_SIZE; j++) {
1388       const float *tx0 = get_texel_3d(samp, addr, x0[j], y0[j], layer[j]);
1389       const float *tx1 = get_texel_3d(samp, addr, x1[j], y0[j], layer[j]);
1390       const float *tx2 = get_texel_3d(samp, addr, x0[j], y1[j], layer[j]);
1391       const float *tx3 = get_texel_3d(samp, addr, x1[j], y1[j], layer[j]);
1392       int c;
1393
1394       /* interpolate R, G, B, A */
1395       for (c = 0; c < 4; c++) {
1396          rgba[c][j] = lerp_2d(xw[j], yw[j],
1397                               tx0[c], tx1[c],
1398                               tx2[c], tx3[c]);
1399       }
1400    }
1401 }
1402
1403
1404 static void
1405 img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler,
1406                        const float s[QUAD_SIZE],
1407                        const float t[QUAD_SIZE],
1408                        const float p[QUAD_SIZE],
1409                        const float c0[QUAD_SIZE],
1410                        enum tgsi_sampler_control control,
1411                        float rgba[NUM_CHANNELS][QUAD_SIZE])
1412 {
1413    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1414    const struct pipe_resource *texture = samp->view->texture;
1415    const unsigned *faces = samp->faces; /* zero when not cube-mapping */
1416    unsigned level0, j;
1417    int width, height;
1418    int x0[4], y0[4], x1[4], y1[4];
1419    float xw[4], yw[4]; /* weights */
1420    union tex_tile_address addr;
1421
1422    level0 = samp->level;
1423    width = u_minify(texture->width0, level0);
1424    height = u_minify(texture->height0, level0);
1425
1426    assert(width > 0);
1427    assert(height > 0);
1428
1429    addr.value = 0;
1430    addr.bits.level = samp->level;
1431
1432    samp->linear_texcoord_s(s, width,  x0, x1, xw);
1433    samp->linear_texcoord_t(t, height, y0, y1, yw);
1434
1435    for (j = 0; j < QUAD_SIZE; j++) {
1436       union tex_tile_address addrj = face(addr, faces[j]);
1437       const float *tx0 = get_texel_2d(samp, addrj, x0[j], y0[j]);
1438       const float *tx1 = get_texel_2d(samp, addrj, x1[j], y0[j]);
1439       const float *tx2 = get_texel_2d(samp, addrj, x0[j], y1[j]);
1440       const float *tx3 = get_texel_2d(samp, addrj, x1[j], y1[j]);
1441       int c;
1442
1443       /* interpolate R, G, B, A */
1444       for (c = 0; c < 4; c++) {
1445          rgba[c][j] = lerp_2d(xw[j], yw[j],
1446                               tx0[c], tx1[c],
1447                               tx2[c], tx3[c]);
1448       }
1449    }
1450 }
1451
1452
1453 static void
1454 img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler,
1455                      const float s[QUAD_SIZE],
1456                      const float t[QUAD_SIZE],
1457                      const float p[QUAD_SIZE],
1458                      const float c0[QUAD_SIZE],
1459                      enum tgsi_sampler_control control,
1460                      float rgba[NUM_CHANNELS][QUAD_SIZE])
1461 {
1462    const struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1463    const struct pipe_resource *texture = samp->view->texture;
1464    unsigned level0, j;
1465    int width, height, depth;
1466    int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4];
1467    float xw[4], yw[4], zw[4]; /* interpolation weights */
1468    union tex_tile_address addr;
1469
1470    level0 = samp->level;
1471    width = u_minify(texture->width0, level0);
1472    height = u_minify(texture->height0, level0);
1473    depth = u_minify(texture->depth0, level0);
1474
1475    addr.value = 0;
1476    addr.bits.level = level0;
1477
1478    assert(width > 0);
1479    assert(height > 0);
1480    assert(depth > 0);
1481
1482    samp->linear_texcoord_s(s, width,  x0, x1, xw);
1483    samp->linear_texcoord_t(t, height, y0, y1, yw);
1484    samp->linear_texcoord_p(p, depth,  z0, z1, zw);
1485
1486    for (j = 0; j < QUAD_SIZE; j++) {
1487       int c;
1488
1489       const float *tx00 = get_texel_3d(samp, addr, x0[j], y0[j], z0[j]);
1490       const float *tx01 = get_texel_3d(samp, addr, x1[j], y0[j], z0[j]);
1491       const float *tx02 = get_texel_3d(samp, addr, x0[j], y1[j], z0[j]);
1492       const float *tx03 = get_texel_3d(samp, addr, x1[j], y1[j], z0[j]);
1493
1494       const float *tx10 = get_texel_3d(samp, addr, x0[j], y0[j], z1[j]);
1495       const float *tx11 = get_texel_3d(samp, addr, x1[j], y0[j], z1[j]);
1496       const float *tx12 = get_texel_3d(samp, addr, x0[j], y1[j], z1[j]);
1497       const float *tx13 = get_texel_3d(samp, addr, x1[j], y1[j], z1[j]);
1498
1499       /* interpolate R, G, B, A */
1500       for (c = 0; c < 4; c++) {
1501          rgba[c][j] = lerp_3d(xw[j], yw[j], zw[j],
1502                               tx00[c], tx01[c],
1503                               tx02[c], tx03[c],
1504                               tx10[c], tx11[c],
1505                               tx12[c], tx13[c]);
1506       }
1507    }
1508 }
1509
1510
1511 /* Calculate level of detail for every fragment.
1512  * Note that lambda has already been biased by global LOD bias.
1513  */
1514 static INLINE void
1515 compute_lod(const struct pipe_sampler_state *sampler,
1516             const float biased_lambda,
1517             const float lodbias[QUAD_SIZE],
1518             float lod[QUAD_SIZE])
1519 {
1520    uint i;
1521
1522    for (i = 0; i < QUAD_SIZE; i++) {
1523       lod[i] = biased_lambda + lodbias[i];
1524       lod[i] = CLAMP(lod[i], sampler->min_lod, sampler->max_lod);
1525    }
1526 }
1527
1528
1529 static void
1530 mip_filter_linear(struct tgsi_sampler *tgsi_sampler,
1531                   const float s[QUAD_SIZE],
1532                   const float t[QUAD_SIZE],
1533                   const float p[QUAD_SIZE],
1534                   const float c0[QUAD_SIZE],
1535                   enum tgsi_sampler_control control,
1536                   float rgba[NUM_CHANNELS][QUAD_SIZE])
1537 {
1538    struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1539    const struct pipe_resource *texture = samp->view->texture;
1540    int level0;
1541    float lambda;
1542    float lod[QUAD_SIZE];
1543
1544    if (control == tgsi_sampler_lod_bias) {
1545       lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
1546       compute_lod(samp->sampler, lambda, c0, lod);
1547    } else {
1548       assert(control == tgsi_sampler_lod_explicit);
1549
1550       memcpy(lod, c0, sizeof(lod));
1551    }
1552
1553    /* XXX: Take into account all lod values.
1554     */
1555    lambda = lod[0];
1556    level0 = (int)lambda;
1557
1558    if (lambda < 0.0) {
1559       samp->level = 0;
1560       samp->mag_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba);
1561    }
1562    else if (level0 >= texture->last_level) {
1563       samp->level = texture->last_level;
1564       samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba);
1565    }
1566    else {
1567       float levelBlend = lambda - level0;
1568       float rgba0[4][4];
1569       float rgba1[4][4];
1570       int c,j;
1571
1572       samp->level = level0;
1573       samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba0);
1574
1575       samp->level = level0+1;
1576       samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba1);
1577
1578       for (j = 0; j < QUAD_SIZE; j++) {
1579          for (c = 0; c < 4; c++) {
1580             rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]);
1581          }
1582       }
1583    }
1584
1585    if (DEBUG_TEX) {
1586       print_sample(__FUNCTION__, rgba);
1587    }
1588 }
1589
1590
1591 /**
1592  * Compute nearest mipmap level from texcoords.
1593  * Then sample the texture level for four elements of a quad.
1594  * \param c0  the LOD bias factors, or absolute LODs (depending on control)
1595  */
1596 static void
1597 mip_filter_nearest(struct tgsi_sampler *tgsi_sampler,
1598                    const float s[QUAD_SIZE],
1599                    const float t[QUAD_SIZE],
1600                    const float p[QUAD_SIZE],
1601                    const float c0[QUAD_SIZE],
1602                    enum tgsi_sampler_control control,
1603                    float rgba[NUM_CHANNELS][QUAD_SIZE])
1604 {
1605    struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1606    const struct pipe_resource *texture = samp->view->texture;
1607    float lambda;
1608    float lod[QUAD_SIZE];
1609
1610    if (control == tgsi_sampler_lod_bias) {
1611       lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
1612       compute_lod(samp->sampler, lambda, c0, lod);
1613    } else {
1614       assert(control == tgsi_sampler_lod_explicit);
1615
1616       memcpy(lod, c0, sizeof(lod));
1617    }
1618
1619    /* XXX: Take into account all lod values.
1620     */
1621    lambda = lod[0];
1622
1623    if (lambda < 0.0) {
1624       samp->level = 0;
1625       samp->mag_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba);
1626    }
1627    else {
1628       samp->level = (int)(lambda + 0.5) ;
1629       samp->level = MIN2(samp->level, (int)texture->last_level);
1630       samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba);
1631    }
1632
1633    if (DEBUG_TEX) {
1634       print_sample(__FUNCTION__, rgba);
1635    }
1636 }
1637
1638
1639 static void
1640 mip_filter_none(struct tgsi_sampler *tgsi_sampler,
1641                 const float s[QUAD_SIZE],
1642                 const float t[QUAD_SIZE],
1643                 const float p[QUAD_SIZE],
1644                 const float c0[QUAD_SIZE],
1645                 enum tgsi_sampler_control control,
1646                 float rgba[NUM_CHANNELS][QUAD_SIZE])
1647 {
1648    struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1649    float lambda;
1650    float lod[QUAD_SIZE];
1651
1652    if (control == tgsi_sampler_lod_bias) {
1653       lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
1654       compute_lod(samp->sampler, lambda, c0, lod);
1655    } else {
1656       assert(control == tgsi_sampler_lod_explicit);
1657
1658       memcpy(lod, c0, sizeof(lod));
1659    }
1660
1661    /* XXX: Take into account all lod values.
1662     */
1663    lambda = lod[0];
1664
1665    if (lambda < 0.0) {
1666       samp->mag_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba);
1667    }
1668    else {
1669       samp->min_img_filter(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba);
1670    }
1671 }
1672
1673
1674
1675 /**
1676  * Specialized version of mip_filter_linear with hard-wired calls to
1677  * 2d lambda calculation and 2d_linear_repeat_POT img filters.
1678  */
1679 static void
1680 mip_filter_linear_2d_linear_repeat_POT(
1681    struct tgsi_sampler *tgsi_sampler,
1682    const float s[QUAD_SIZE],
1683    const float t[QUAD_SIZE],
1684    const float p[QUAD_SIZE],
1685    const float c0[QUAD_SIZE],
1686    enum tgsi_sampler_control control,
1687    float rgba[NUM_CHANNELS][QUAD_SIZE])
1688 {
1689    struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1690    const struct pipe_resource *texture = samp->view->texture;
1691    int level0;
1692    float lambda;
1693    float lod[QUAD_SIZE];
1694
1695    if (control == tgsi_sampler_lod_bias) {
1696       lambda = samp->compute_lambda(samp, s, t, p) + samp->sampler->lod_bias;
1697       compute_lod(samp->sampler, lambda, c0, lod);
1698    } else {
1699       assert(control == tgsi_sampler_lod_explicit);
1700
1701       memcpy(lod, c0, sizeof(lod));
1702    }
1703
1704    /* XXX: Take into account all lod values.
1705     */
1706    lambda = lod[0];
1707    level0 = (int)lambda;
1708
1709    /* Catches both negative and large values of level0:
1710     */
1711    if ((unsigned)level0 >= texture->last_level) {
1712       if (level0 < 0)
1713          samp->level = 0;
1714       else
1715          samp->level = texture->last_level;
1716
1717       img_filter_2d_linear_repeat_POT(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba);
1718    }
1719    else {
1720       float levelBlend = lambda - level0;
1721       float rgba0[4][4];
1722       float rgba1[4][4];
1723       int c,j;
1724
1725       samp->level = level0;
1726       img_filter_2d_linear_repeat_POT(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba0);
1727
1728       samp->level = level0+1;
1729       img_filter_2d_linear_repeat_POT(tgsi_sampler, s, t, p, NULL, tgsi_sampler_lod_bias, rgba1);
1730
1731       for (j = 0; j < QUAD_SIZE; j++) {
1732          for (c = 0; c < 4; c++) {
1733             rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]);
1734          }
1735       }
1736    }
1737
1738    if (DEBUG_TEX) {
1739       print_sample(__FUNCTION__, rgba);
1740    }
1741 }
1742
1743
1744
1745 /**
1746  * Do shadow/depth comparisons.
1747  */
1748 static void
1749 sample_compare(struct tgsi_sampler *tgsi_sampler,
1750                const float s[QUAD_SIZE],
1751                const float t[QUAD_SIZE],
1752                const float p[QUAD_SIZE],
1753                const float c0[QUAD_SIZE],
1754                enum tgsi_sampler_control control,
1755                float rgba[NUM_CHANNELS][QUAD_SIZE])
1756 {
1757    struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1758    const struct pipe_sampler_state *sampler = samp->sampler;
1759    int j, k0, k1, k2, k3;
1760    float val;
1761
1762    samp->mip_filter(tgsi_sampler, s, t, p, c0, control, rgba);
1763
1764    /**
1765     * Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
1766     * When we sampled the depth texture, the depth value was put into all
1767     * RGBA channels.  We look at the red channel here.
1768     */
1769
1770    /* compare four texcoords vs. four texture samples */
1771    switch (sampler->compare_func) {
1772    case PIPE_FUNC_LESS:
1773       k0 = p[0] < rgba[0][0];
1774       k1 = p[1] < rgba[0][1];
1775       k2 = p[2] < rgba[0][2];
1776       k3 = p[3] < rgba[0][3];
1777       break;
1778    case PIPE_FUNC_LEQUAL:
1779       k0 = p[0] <= rgba[0][0];
1780       k1 = p[1] <= rgba[0][1];
1781       k2 = p[2] <= rgba[0][2];
1782       k3 = p[3] <= rgba[0][3];
1783       break;
1784    case PIPE_FUNC_GREATER:
1785       k0 = p[0] > rgba[0][0];
1786       k1 = p[1] > rgba[0][1];
1787       k2 = p[2] > rgba[0][2];
1788       k3 = p[3] > rgba[0][3];
1789       break;
1790    case PIPE_FUNC_GEQUAL:
1791       k0 = p[0] >= rgba[0][0];
1792       k1 = p[1] >= rgba[0][1];
1793       k2 = p[2] >= rgba[0][2];
1794       k3 = p[3] >= rgba[0][3];
1795       break;
1796    case PIPE_FUNC_EQUAL:
1797       k0 = p[0] == rgba[0][0];
1798       k1 = p[1] == rgba[0][1];
1799       k2 = p[2] == rgba[0][2];
1800       k3 = p[3] == rgba[0][3];
1801       break;
1802    case PIPE_FUNC_NOTEQUAL:
1803       k0 = p[0] != rgba[0][0];
1804       k1 = p[1] != rgba[0][1];
1805       k2 = p[2] != rgba[0][2];
1806       k3 = p[3] != rgba[0][3];
1807       break;
1808    case PIPE_FUNC_ALWAYS:
1809       k0 = k1 = k2 = k3 = 1;
1810       break;
1811    case PIPE_FUNC_NEVER:
1812       k0 = k1 = k2 = k3 = 0;
1813       break;
1814    default:
1815       k0 = k1 = k2 = k3 = 0;
1816       assert(0);
1817       break;
1818    }
1819
1820    /* convert four pass/fail values to an intensity in [0,1] */
1821    val = 0.25F * (k0 + k1 + k2 + k3);
1822
1823    /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
1824    for (j = 0; j < 4; j++) {
1825       rgba[0][j] = rgba[1][j] = rgba[2][j] = val;
1826       rgba[3][j] = 1.0F;
1827    }
1828 }
1829
1830
1831 /**
1832  * Use 3D texcoords to choose a cube face, then sample the 2D cube faces.
1833  * Put face info into the sampler faces[] array.
1834  */
1835 static void
1836 sample_cube(struct tgsi_sampler *tgsi_sampler,
1837             const float s[QUAD_SIZE],
1838             const float t[QUAD_SIZE],
1839             const float p[QUAD_SIZE],
1840             const float c0[QUAD_SIZE],
1841             enum tgsi_sampler_control control,
1842             float rgba[NUM_CHANNELS][QUAD_SIZE])
1843 {
1844    struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1845    unsigned j;
1846    float ssss[4], tttt[4];
1847
1848    /*
1849      major axis
1850      direction    target                             sc     tc    ma
1851      ----------   -------------------------------    ---    ---   ---
1852      +rx          TEXTURE_CUBE_MAP_POSITIVE_X_EXT    -rz    -ry   rx
1853      -rx          TEXTURE_CUBE_MAP_NEGATIVE_X_EXT    +rz    -ry   rx
1854      +ry          TEXTURE_CUBE_MAP_POSITIVE_Y_EXT    +rx    +rz   ry
1855      -ry          TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT    +rx    -rz   ry
1856      +rz          TEXTURE_CUBE_MAP_POSITIVE_Z_EXT    +rx    -ry   rz
1857      -rz          TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT    -rx    -ry   rz
1858    */
1859
1860    /* Choose the cube face and compute new s/t coords for the 2D face.
1861     *
1862     * Use the same cube face for all four pixels in the quad.
1863     *
1864     * This isn't ideal, but if we want to use a different cube face
1865     * per pixel in the quad, we'd have to also compute the per-face
1866     * LOD here too.  That's because the four post-face-selection
1867     * texcoords are no longer related to each other (they're
1868     * per-face!)  so we can't use subtraction to compute the partial
1869     * deriviates to compute the LOD.  Doing so (near cube edges
1870     * anyway) gives us pretty much random values.
1871     */
1872    {
1873       /* use the average of the four pixel's texcoords to choose the face */
1874       const float rx = 0.25 * (s[0] + s[1] + s[2] + s[3]);
1875       const float ry = 0.25 * (t[0] + t[1] + t[2] + t[3]);
1876       const float rz = 0.25 * (p[0] + p[1] + p[2] + p[3]);
1877       const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz);
1878
1879       if (arx >= ary && arx >= arz) {
1880          float sign = (rx >= 0.0F) ? 1.0F : -1.0F;
1881          uint face = (rx >= 0.0F) ? PIPE_TEX_FACE_POS_X : PIPE_TEX_FACE_NEG_X;
1882          for (j = 0; j < QUAD_SIZE; j++) {
1883             const float ima = -0.5F / fabsf(s[j]);
1884             ssss[j] = sign *  p[j] * ima + 0.5F;
1885             tttt[j] =         t[j] * ima + 0.5F;
1886             samp->faces[j] = face;
1887          }
1888       }
1889       else if (ary >= arx && ary >= arz) {
1890          float sign = (ry >= 0.0F) ? 1.0F : -1.0F;
1891          uint face = (ry >= 0.0F) ? PIPE_TEX_FACE_POS_Y : PIPE_TEX_FACE_NEG_Y;
1892          for (j = 0; j < QUAD_SIZE; j++) {
1893             const float ima = -0.5F / fabsf(t[j]);
1894             ssss[j] =        -s[j] * ima + 0.5F;
1895             tttt[j] = sign * -p[j] * ima + 0.5F;
1896             samp->faces[j] = face;
1897          }
1898       }
1899       else {
1900          float sign = (rz >= 0.0F) ? 1.0F : -1.0F;
1901          uint face = (rz >= 0.0F) ? PIPE_TEX_FACE_POS_Z : PIPE_TEX_FACE_NEG_Z;
1902          for (j = 0; j < QUAD_SIZE; j++) {
1903             const float ima = -0.5 / fabsf(p[j]);
1904             ssss[j] = sign * -s[j] * ima + 0.5F;
1905             tttt[j] =         t[j] * ima + 0.5F;
1906             samp->faces[j] = face;
1907          }
1908       }
1909    }
1910
1911    /* In our little pipeline, the compare stage is next.  If compare
1912     * is not active, this will point somewhere deeper into the
1913     * pipeline, eg. to mip_filter or even img_filter.
1914     */
1915    samp->compare(tgsi_sampler, ssss, tttt, NULL, c0, control, rgba);
1916 }
1917
1918
1919 static void
1920 sample_swizzle(struct tgsi_sampler *tgsi_sampler,
1921                const float s[QUAD_SIZE],
1922                const float t[QUAD_SIZE],
1923                const float p[QUAD_SIZE],
1924                const float c0[QUAD_SIZE],
1925                enum tgsi_sampler_control control,
1926                float rgba[NUM_CHANNELS][QUAD_SIZE])
1927 {
1928    struct sp_sampler_variant *samp = sp_sampler_variant(tgsi_sampler);
1929    float rgba_temp[NUM_CHANNELS][QUAD_SIZE];
1930    const unsigned swizzle_r = samp->key.bits.swizzle_r;
1931    const unsigned swizzle_g = samp->key.bits.swizzle_g;
1932    const unsigned swizzle_b = samp->key.bits.swizzle_b;
1933    const unsigned swizzle_a = samp->key.bits.swizzle_a;
1934    unsigned j;
1935
1936    samp->sample_target(tgsi_sampler, s, t, p, c0, control, rgba_temp);
1937
1938    switch (swizzle_r) {
1939    case PIPE_SWIZZLE_ZERO:
1940       for (j = 0; j < 4; j++)
1941          rgba[0][j] = 0.0f;
1942       break;
1943    case PIPE_SWIZZLE_ONE:
1944       for (j = 0; j < 4; j++)
1945          rgba[0][j] = 1.0f;
1946       break;
1947    default:
1948       assert(swizzle_r < 4);
1949       for (j = 0; j < 4; j++)
1950          rgba[0][j] = rgba_temp[swizzle_r][j];
1951    }
1952
1953    switch (swizzle_g) {
1954    case PIPE_SWIZZLE_ZERO:
1955       for (j = 0; j < 4; j++)
1956          rgba[1][j] = 0.0f;
1957       break;
1958    case PIPE_SWIZZLE_ONE:
1959       for (j = 0; j < 4; j++)
1960          rgba[1][j] = 1.0f;
1961       break;
1962    default:
1963       assert(swizzle_g < 4);
1964       for (j = 0; j < 4; j++)
1965          rgba[1][j] = rgba_temp[swizzle_g][j];
1966    }
1967
1968    switch (swizzle_b) {
1969    case PIPE_SWIZZLE_ZERO:
1970       for (j = 0; j < 4; j++)
1971          rgba[2][j] = 0.0f;
1972       break;
1973    case PIPE_SWIZZLE_ONE:
1974       for (j = 0; j < 4; j++)
1975          rgba[2][j] = 1.0f;
1976       break;
1977    default:
1978       assert(swizzle_b < 4);
1979       for (j = 0; j < 4; j++)
1980          rgba[2][j] = rgba_temp[swizzle_b][j];
1981    }
1982
1983    switch (swizzle_a) {
1984    case PIPE_SWIZZLE_ZERO:
1985       for (j = 0; j < 4; j++)
1986          rgba[3][j] = 0.0f;
1987       break;
1988    case PIPE_SWIZZLE_ONE:
1989       for (j = 0; j < 4; j++)
1990          rgba[3][j] = 1.0f;
1991       break;
1992    default:
1993       assert(swizzle_a < 4);
1994       for (j = 0; j < 4; j++)
1995          rgba[3][j] = rgba_temp[swizzle_a][j];
1996    }
1997 }
1998
1999
2000 static wrap_nearest_func
2001 get_nearest_unorm_wrap(unsigned mode)
2002 {
2003    switch (mode) {
2004    case PIPE_TEX_WRAP_CLAMP:
2005       return wrap_nearest_unorm_clamp;
2006    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2007       return wrap_nearest_unorm_clamp_to_edge;
2008    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2009       return wrap_nearest_unorm_clamp_to_border;
2010    default:
2011       assert(0);
2012       return wrap_nearest_unorm_clamp;
2013    }
2014 }
2015
2016
2017 static wrap_nearest_func
2018 get_nearest_wrap(unsigned mode)
2019 {
2020    switch (mode) {
2021    case PIPE_TEX_WRAP_REPEAT:
2022       return wrap_nearest_repeat;
2023    case PIPE_TEX_WRAP_CLAMP:
2024       return wrap_nearest_clamp;
2025    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2026       return wrap_nearest_clamp_to_edge;
2027    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2028       return wrap_nearest_clamp_to_border;
2029    case PIPE_TEX_WRAP_MIRROR_REPEAT:
2030       return wrap_nearest_mirror_repeat;
2031    case PIPE_TEX_WRAP_MIRROR_CLAMP:
2032       return wrap_nearest_mirror_clamp;
2033    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
2034       return wrap_nearest_mirror_clamp_to_edge;
2035    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
2036       return wrap_nearest_mirror_clamp_to_border;
2037    default:
2038       assert(0);
2039       return wrap_nearest_repeat;
2040    }
2041 }
2042
2043
2044 static wrap_linear_func
2045 get_linear_unorm_wrap(unsigned mode)
2046 {
2047    switch (mode) {
2048    case PIPE_TEX_WRAP_CLAMP:
2049       return wrap_linear_unorm_clamp;
2050    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2051       return wrap_linear_unorm_clamp_to_edge;
2052    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2053       return wrap_linear_unorm_clamp_to_border;
2054    default:
2055       assert(0);
2056       return wrap_linear_unorm_clamp;
2057    }
2058 }
2059
2060
2061 static wrap_linear_func
2062 get_linear_wrap(unsigned mode)
2063 {
2064    switch (mode) {
2065    case PIPE_TEX_WRAP_REPEAT:
2066       return wrap_linear_repeat;
2067    case PIPE_TEX_WRAP_CLAMP:
2068       return wrap_linear_clamp;
2069    case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2070       return wrap_linear_clamp_to_edge;
2071    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2072       return wrap_linear_clamp_to_border;
2073    case PIPE_TEX_WRAP_MIRROR_REPEAT:
2074       return wrap_linear_mirror_repeat;
2075    case PIPE_TEX_WRAP_MIRROR_CLAMP:
2076       return wrap_linear_mirror_clamp;
2077    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
2078       return wrap_linear_mirror_clamp_to_edge;
2079    case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
2080       return wrap_linear_mirror_clamp_to_border;
2081    default:
2082       assert(0);
2083       return wrap_linear_repeat;
2084    }
2085 }
2086
2087
2088 static compute_lambda_func
2089 get_lambda_func(const union sp_sampler_key key)
2090 {
2091    if (key.bits.processor == TGSI_PROCESSOR_VERTEX)
2092       return compute_lambda_vert;
2093
2094    switch (key.bits.target) {
2095    case PIPE_TEXTURE_1D:
2096    case PIPE_TEXTURE_1D_ARRAY:
2097       return compute_lambda_1d;
2098    case PIPE_TEXTURE_2D:
2099    case PIPE_TEXTURE_2D_ARRAY:
2100    case PIPE_TEXTURE_RECT:
2101    case PIPE_TEXTURE_CUBE:
2102       return compute_lambda_2d;
2103    case PIPE_TEXTURE_3D:
2104       return compute_lambda_3d;
2105    default:
2106       assert(0);
2107       return compute_lambda_1d;
2108    }
2109 }
2110
2111
2112 static filter_func
2113 get_img_filter(const union sp_sampler_key key,
2114                unsigned filter,
2115                const struct pipe_sampler_state *sampler)
2116 {
2117    switch (key.bits.target) {
2118    case PIPE_TEXTURE_1D:
2119       if (filter == PIPE_TEX_FILTER_NEAREST)
2120          return img_filter_1d_nearest;
2121       else
2122          return img_filter_1d_linear;
2123       break;
2124    case PIPE_TEXTURE_1D_ARRAY:
2125       if (filter == PIPE_TEX_FILTER_NEAREST)
2126          return img_filter_1d_array_nearest;
2127       else
2128          return img_filter_1d_array_linear;
2129       break;
2130    case PIPE_TEXTURE_2D:
2131    case PIPE_TEXTURE_RECT:
2132       /* Try for fast path:
2133        */
2134       if (key.bits.is_pot &&
2135           sampler->wrap_s == sampler->wrap_t &&
2136           sampler->normalized_coords)
2137       {
2138          switch (sampler->wrap_s) {
2139          case PIPE_TEX_WRAP_REPEAT:
2140             switch (filter) {
2141             case PIPE_TEX_FILTER_NEAREST:
2142                return img_filter_2d_nearest_repeat_POT;
2143             case PIPE_TEX_FILTER_LINEAR:
2144                return img_filter_2d_linear_repeat_POT;
2145             default:
2146                break;
2147             }
2148             break;
2149          case PIPE_TEX_WRAP_CLAMP:
2150             switch (filter) {
2151             case PIPE_TEX_FILTER_NEAREST:
2152                return img_filter_2d_nearest_clamp_POT;
2153             default:
2154                break;
2155             }
2156          }
2157       }
2158       /* Otherwise use default versions:
2159        */
2160       if (filter == PIPE_TEX_FILTER_NEAREST)
2161          return img_filter_2d_nearest;
2162       else
2163          return img_filter_2d_linear;
2164       break;
2165    case PIPE_TEXTURE_2D_ARRAY:
2166       if (filter == PIPE_TEX_FILTER_NEAREST)
2167          return img_filter_2d_array_nearest;
2168       else
2169          return img_filter_2d_array_linear;
2170       break;
2171    case PIPE_TEXTURE_CUBE:
2172       if (filter == PIPE_TEX_FILTER_NEAREST)
2173          return img_filter_cube_nearest;
2174       else
2175          return img_filter_cube_linear;
2176       break;
2177    case PIPE_TEXTURE_3D:
2178       if (filter == PIPE_TEX_FILTER_NEAREST)
2179          return img_filter_3d_nearest;
2180       else
2181          return img_filter_3d_linear;
2182       break;
2183    default:
2184       assert(0);
2185       return img_filter_1d_nearest;
2186    }
2187 }
2188
2189
2190 /**
2191  * Bind the given texture object and texture cache to the sampler variant.
2192  */
2193 void
2194 sp_sampler_variant_bind_view( struct sp_sampler_variant *samp,
2195                               struct softpipe_tex_tile_cache *tex_cache,
2196                               const struct pipe_sampler_view *view )
2197 {
2198    const struct pipe_sampler_state *sampler = samp->sampler;
2199    const struct pipe_resource *texture = view->texture;
2200
2201    samp->view = view;
2202    samp->cache = tex_cache;
2203    samp->xpot = util_unsigned_logbase2( texture->width0 );
2204    samp->ypot = util_unsigned_logbase2( texture->height0 );
2205    samp->level = CLAMP((int) sampler->min_lod, 0, (int) texture->last_level);
2206 }
2207
2208
2209 void
2210 sp_sampler_variant_destroy( struct sp_sampler_variant *samp )
2211 {
2212    FREE(samp);
2213 }
2214
2215
2216 /**
2217  * Create a sampler variant for a given set of non-orthogonal state.
2218  */
2219 struct sp_sampler_variant *
2220 sp_create_sampler_variant( const struct pipe_sampler_state *sampler,
2221                            const union sp_sampler_key key )
2222 {
2223    struct sp_sampler_variant *samp = CALLOC_STRUCT(sp_sampler_variant);
2224    if (!samp)
2225       return NULL;
2226
2227    samp->sampler = sampler;
2228    samp->key = key;
2229
2230    /* Note that (for instance) linear_texcoord_s and
2231     * nearest_texcoord_s may be active at the same time, if the
2232     * sampler min_img_filter differs from its mag_img_filter.
2233     */
2234    if (sampler->normalized_coords) {
2235       samp->linear_texcoord_s = get_linear_wrap( sampler->wrap_s );
2236       samp->linear_texcoord_t = get_linear_wrap( sampler->wrap_t );
2237       samp->linear_texcoord_p = get_linear_wrap( sampler->wrap_r );
2238
2239       samp->nearest_texcoord_s = get_nearest_wrap( sampler->wrap_s );
2240       samp->nearest_texcoord_t = get_nearest_wrap( sampler->wrap_t );
2241       samp->nearest_texcoord_p = get_nearest_wrap( sampler->wrap_r );
2242    }
2243    else {
2244       samp->linear_texcoord_s = get_linear_unorm_wrap( sampler->wrap_s );
2245       samp->linear_texcoord_t = get_linear_unorm_wrap( sampler->wrap_t );
2246       samp->linear_texcoord_p = get_linear_unorm_wrap( sampler->wrap_r );
2247
2248       samp->nearest_texcoord_s = get_nearest_unorm_wrap( sampler->wrap_s );
2249       samp->nearest_texcoord_t = get_nearest_unorm_wrap( sampler->wrap_t );
2250       samp->nearest_texcoord_p = get_nearest_unorm_wrap( sampler->wrap_r );
2251    }
2252
2253    samp->compute_lambda = get_lambda_func( key );
2254
2255    samp->min_img_filter = get_img_filter(key, sampler->min_img_filter, sampler);
2256    samp->mag_img_filter = get_img_filter(key, sampler->mag_img_filter, sampler);
2257
2258    switch (sampler->min_mip_filter) {
2259    case PIPE_TEX_MIPFILTER_NONE:
2260       if (sampler->min_img_filter == sampler->mag_img_filter)
2261          samp->mip_filter = samp->min_img_filter;
2262       else
2263          samp->mip_filter = mip_filter_none;
2264       break;
2265
2266    case PIPE_TEX_MIPFILTER_NEAREST:
2267       samp->mip_filter = mip_filter_nearest;
2268       break;
2269
2270    case PIPE_TEX_MIPFILTER_LINEAR:
2271       if (key.bits.is_pot &&
2272           sampler->min_img_filter == sampler->mag_img_filter &&
2273           sampler->normalized_coords &&
2274           sampler->wrap_s == PIPE_TEX_WRAP_REPEAT &&
2275           sampler->wrap_t == PIPE_TEX_WRAP_REPEAT &&
2276           sampler->min_img_filter == PIPE_TEX_FILTER_LINEAR)
2277       {
2278          samp->mip_filter = mip_filter_linear_2d_linear_repeat_POT;
2279       }
2280       else
2281       {
2282          samp->mip_filter = mip_filter_linear;
2283       }
2284       break;
2285    }
2286
2287    if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) {
2288       samp->compare = sample_compare;
2289    }
2290    else {
2291       /* Skip compare operation by promoting the mip_filter function
2292        * pointer:
2293        */
2294       samp->compare = samp->mip_filter;
2295    }
2296
2297    if (key.bits.target == PIPE_TEXTURE_CUBE) {
2298       samp->sample_target = sample_cube;
2299    }
2300    else {
2301       samp->faces[0] = 0;
2302       samp->faces[1] = 0;
2303       samp->faces[2] = 0;
2304       samp->faces[3] = 0;
2305
2306       /* Skip cube face determination by promoting the compare
2307        * function pointer:
2308        */
2309       samp->sample_target = samp->compare;
2310    }
2311
2312    if (key.bits.swizzle_r != PIPE_SWIZZLE_RED ||
2313        key.bits.swizzle_g != PIPE_SWIZZLE_GREEN ||
2314        key.bits.swizzle_b != PIPE_SWIZZLE_BLUE ||
2315        key.bits.swizzle_a != PIPE_SWIZZLE_ALPHA) {
2316       samp->base.get_samples = sample_swizzle;
2317    }
2318    else {
2319       samp->base.get_samples = samp->sample_target;
2320    }
2321
2322    return samp;
2323 }