tgsi: add texel offsets and derivatives to sampler interface
[mesa.git] / src / gallium / drivers / softpipe / sp_tex_sample.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 * Copyright 2008-2010 VMware, Inc. All rights reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * Texture sampling
31 *
32 * Authors:
33 * Brian Paul
34 * Keith Whitwell
35 */
36
37 #include "pipe/p_context.h"
38 #include "pipe/p_defines.h"
39 #include "pipe/p_shader_tokens.h"
40 #include "util/u_math.h"
41 #include "util/u_format.h"
42 #include "util/u_memory.h"
43 #include "sp_quad.h" /* only for #define QUAD_* tokens */
44 #include "sp_tex_sample.h"
45 #include "sp_tex_tile_cache.h"
46
47
48 /** Set to one to help debug texture sampling */
49 #define DEBUG_TEX 0
50
51
52 /*
53 * Return fractional part of 'f'. Used for computing interpolation weights.
54 * Need to be careful with negative values.
55 * Note, if this function isn't perfect you'll sometimes see 1-pixel bands
56 * of improperly weighted linear-filtered textures.
57 * The tests/texwrap.c demo is a good test.
58 */
59 static INLINE float
60 frac(float f)
61 {
62 return f - floorf(f);
63 }
64
65
66
67 /**
68 * Linear interpolation macro
69 */
70 static INLINE float
71 lerp(float a, float v0, float v1)
72 {
73 return v0 + a * (v1 - v0);
74 }
75
76
77 /**
78 * Do 2D/bilinear interpolation of float values.
79 * v00, v10, v01 and v11 are typically four texture samples in a square/box.
80 * a and b are the horizontal and vertical interpolants.
81 * It's important that this function is inlined when compiled with
82 * optimization! If we find that's not true on some systems, convert
83 * to a macro.
84 */
85 static INLINE float
86 lerp_2d(float a, float b,
87 float v00, float v10, float v01, float v11)
88 {
89 const float temp0 = lerp(a, v00, v10);
90 const float temp1 = lerp(a, v01, v11);
91 return lerp(b, temp0, temp1);
92 }
93
94
95 /**
96 * As above, but 3D interpolation of 8 values.
97 */
98 static INLINE float
99 lerp_3d(float a, float b, float c,
100 float v000, float v100, float v010, float v110,
101 float v001, float v101, float v011, float v111)
102 {
103 const float temp0 = lerp_2d(a, b, v000, v100, v010, v110);
104 const float temp1 = lerp_2d(a, b, v001, v101, v011, v111);
105 return lerp(c, temp0, temp1);
106 }
107
108
109
110 /**
111 * Compute coord % size for repeat wrap modes.
112 * Note that if coord is negative, coord % size doesn't give the right
113 * value. To avoid that problem we add a large multiple of the size
114 * (rather than using a conditional).
115 */
116 static INLINE int
117 repeat(int coord, unsigned size)
118 {
119 return (coord + size * 1024) % size;
120 }
121
122
123 /**
124 * Apply texture coord wrapping mode and return integer texture indexes
125 * for a vector of four texcoords (S or T or P).
126 * \param wrapMode PIPE_TEX_WRAP_x
127 * \param s the incoming texcoords
128 * \param size the texture image size
129 * \param icoord returns the integer texcoords
130 */
131 static void
132 wrap_nearest_repeat(float s, unsigned size, int *icoord)
133 {
134 /* s limited to [0,1) */
135 /* i limited to [0,size-1] */
136 int i = util_ifloor(s * size);
137 *icoord = repeat(i, size);
138 }
139
140
141 static void
142 wrap_nearest_clamp(float s, unsigned size, int *icoord)
143 {
144 /* s limited to [0,1] */
145 /* i limited to [0,size-1] */
146 if (s <= 0.0F)
147 *icoord = 0;
148 else if (s >= 1.0F)
149 *icoord = size - 1;
150 else
151 *icoord = util_ifloor(s * size);
152 }
153
154
155 static void
156 wrap_nearest_clamp_to_edge(float s, unsigned size, int *icoord)
157 {
158 /* s limited to [min,max] */
159 /* i limited to [0, size-1] */
160 const float min = 1.0F / (2.0F * size);
161 const float max = 1.0F - min;
162 if (s < min)
163 *icoord = 0;
164 else if (s > max)
165 *icoord = size - 1;
166 else
167 *icoord = util_ifloor(s * size);
168 }
169
170
171 static void
172 wrap_nearest_clamp_to_border(float s, unsigned size, int *icoord)
173 {
174 /* s limited to [min,max] */
175 /* i limited to [-1, size] */
176 const float min = -1.0F / (2.0F * size);
177 const float max = 1.0F - min;
178 if (s <= min)
179 *icoord = -1;
180 else if (s >= max)
181 *icoord = size;
182 else
183 *icoord = util_ifloor(s * size);
184 }
185
186
187 static void
188 wrap_nearest_mirror_repeat(float s, unsigned size, int *icoord)
189 {
190 const float min = 1.0F / (2.0F * size);
191 const float max = 1.0F - min;
192 const int flr = util_ifloor(s);
193 float u = frac(s);
194 if (flr & 1)
195 u = 1.0F - u;
196 if (u < min)
197 *icoord = 0;
198 else if (u > max)
199 *icoord = size - 1;
200 else
201 *icoord = util_ifloor(u * size);
202 }
203
204
205 static void
206 wrap_nearest_mirror_clamp(float s, unsigned size, int *icoord)
207 {
208 /* s limited to [0,1] */
209 /* i limited to [0,size-1] */
210 const float u = fabsf(s);
211 if (u <= 0.0F)
212 *icoord = 0;
213 else if (u >= 1.0F)
214 *icoord = size - 1;
215 else
216 *icoord = util_ifloor(u * size);
217 }
218
219
220 static void
221 wrap_nearest_mirror_clamp_to_edge(float s, unsigned size, int *icoord)
222 {
223 /* s limited to [min,max] */
224 /* i limited to [0, size-1] */
225 const float min = 1.0F / (2.0F * size);
226 const float max = 1.0F - min;
227 const float u = fabsf(s);
228 if (u < min)
229 *icoord = 0;
230 else if (u > max)
231 *icoord = size - 1;
232 else
233 *icoord = util_ifloor(u * size);
234 }
235
236
237 static void
238 wrap_nearest_mirror_clamp_to_border(float s, unsigned size, int *icoord)
239 {
240 /* s limited to [min,max] */
241 /* i limited to [0, size-1] */
242 const float min = -1.0F / (2.0F * size);
243 const float max = 1.0F - min;
244 const float u = fabsf(s);
245 if (u < min)
246 *icoord = -1;
247 else if (u > max)
248 *icoord = size;
249 else
250 *icoord = util_ifloor(u * size);
251 }
252
253
254 /**
255 * Used to compute texel locations for linear sampling
256 * \param wrapMode PIPE_TEX_WRAP_x
257 * \param s the texcoord
258 * \param size the texture image size
259 * \param icoord0 returns first texture index
260 * \param icoord1 returns second texture index (usually icoord0 + 1)
261 * \param w returns blend factor/weight between texture indices
262 * \param icoord returns the computed integer texture coord
263 */
264 static void
265 wrap_linear_repeat(float s, unsigned size,
266 int *icoord0, int *icoord1, float *w)
267 {
268 float u = s * size - 0.5F;
269 *icoord0 = repeat(util_ifloor(u), size);
270 *icoord1 = repeat(*icoord0 + 1, size);
271 *w = frac(u);
272 }
273
274
275 static void
276 wrap_linear_clamp(float s, unsigned size,
277 int *icoord0, int *icoord1, float *w)
278 {
279 float u = CLAMP(s, 0.0F, 1.0F);
280 u = u * size - 0.5f;
281 *icoord0 = util_ifloor(u);
282 *icoord1 = *icoord0 + 1;
283 *w = frac(u);
284 }
285
286
287 static void
288 wrap_linear_clamp_to_edge(float s, unsigned size,
289 int *icoord0, int *icoord1, float *w)
290 {
291 float u = CLAMP(s, 0.0F, 1.0F);
292 u = u * size - 0.5f;
293 *icoord0 = util_ifloor(u);
294 *icoord1 = *icoord0 + 1;
295 if (*icoord0 < 0)
296 *icoord0 = 0;
297 if (*icoord1 >= (int) size)
298 *icoord1 = size - 1;
299 *w = frac(u);
300 }
301
302
303 static void
304 wrap_linear_clamp_to_border(float s, unsigned size,
305 int *icoord0, int *icoord1, float *w)
306 {
307 const float min = -1.0F / (2.0F * size);
308 const float max = 1.0F - min;
309 float u = CLAMP(s, min, max);
310 u = u * size - 0.5f;
311 *icoord0 = util_ifloor(u);
312 *icoord1 = *icoord0 + 1;
313 *w = frac(u);
314 }
315
316
317 static void
318 wrap_linear_mirror_repeat(float s, unsigned size,
319 int *icoord0, int *icoord1, float *w)
320 {
321 const int flr = util_ifloor(s);
322 float u = frac(s);
323 if (flr & 1)
324 u = 1.0F - u;
325 u = u * size - 0.5F;
326 *icoord0 = util_ifloor(u);
327 *icoord1 = *icoord0 + 1;
328 if (*icoord0 < 0)
329 *icoord0 = 0;
330 if (*icoord1 >= (int) size)
331 *icoord1 = size - 1;
332 *w = frac(u);
333 }
334
335
336 static void
337 wrap_linear_mirror_clamp(float s, unsigned size,
338 int *icoord0, int *icoord1, float *w)
339 {
340 float u = fabsf(s);
341 if (u >= 1.0F)
342 u = (float) size;
343 else
344 u *= size;
345 u -= 0.5F;
346 *icoord0 = util_ifloor(u);
347 *icoord1 = *icoord0 + 1;
348 *w = frac(u);
349 }
350
351
352 static void
353 wrap_linear_mirror_clamp_to_edge(float s, unsigned size,
354 int *icoord0, int *icoord1, float *w)
355 {
356 float u = fabsf(s);
357 if (u >= 1.0F)
358 u = (float) size;
359 else
360 u *= size;
361 u -= 0.5F;
362 *icoord0 = util_ifloor(u);
363 *icoord1 = *icoord0 + 1;
364 if (*icoord0 < 0)
365 *icoord0 = 0;
366 if (*icoord1 >= (int) size)
367 *icoord1 = size - 1;
368 *w = frac(u);
369 }
370
371
372 static void
373 wrap_linear_mirror_clamp_to_border(float s, unsigned size,
374 int *icoord0, int *icoord1, float *w)
375 {
376 const float min = -1.0F / (2.0F * size);
377 const float max = 1.0F - min;
378 float u = fabsf(s);
379 if (u <= min)
380 u = min * size;
381 else if (u >= max)
382 u = max * size;
383 else
384 u *= size;
385 u -= 0.5F;
386 *icoord0 = util_ifloor(u);
387 *icoord1 = *icoord0 + 1;
388 *w = frac(u);
389 }
390
391
392 /**
393 * PIPE_TEX_WRAP_CLAMP for nearest sampling, unnormalized coords.
394 */
395 static void
396 wrap_nearest_unorm_clamp(float s, unsigned size, int *icoord)
397 {
398 int i = util_ifloor(s);
399 *icoord = CLAMP(i, 0, (int) size-1);
400 }
401
402
403 /**
404 * PIPE_TEX_WRAP_CLAMP_TO_BORDER for nearest sampling, unnormalized coords.
405 */
406 static void
407 wrap_nearest_unorm_clamp_to_border(float s, unsigned size, int *icoord)
408 {
409 *icoord = util_ifloor( CLAMP(s, -0.5F, (float) size + 0.5F) );
410 }
411
412
413 /**
414 * PIPE_TEX_WRAP_CLAMP_TO_EDGE for nearest sampling, unnormalized coords.
415 */
416 static void
417 wrap_nearest_unorm_clamp_to_edge(float s, unsigned size, int *icoord)
418 {
419 *icoord = util_ifloor( CLAMP(s, 0.5F, (float) size - 0.5F) );
420 }
421
422
423 /**
424 * PIPE_TEX_WRAP_CLAMP for linear sampling, unnormalized coords.
425 */
426 static void
427 wrap_linear_unorm_clamp(float s, unsigned size,
428 int *icoord0, int *icoord1, float *w)
429 {
430 /* Not exactly what the spec says, but it matches NVIDIA output */
431 float u = CLAMP(s - 0.5F, 0.0f, (float) size - 1.0f);
432 *icoord0 = util_ifloor(u);
433 *icoord1 = *icoord0 + 1;
434 *w = frac(u);
435 }
436
437
438 /**
439 * PIPE_TEX_WRAP_CLAMP_TO_BORDER for linear sampling, unnormalized coords.
440 */
441 static void
442 wrap_linear_unorm_clamp_to_border(float s, unsigned size,
443 int *icoord0, int *icoord1, float *w)
444 {
445 float u = CLAMP(s, -0.5F, (float) size + 0.5F);
446 u -= 0.5F;
447 *icoord0 = util_ifloor(u);
448 *icoord1 = *icoord0 + 1;
449 if (*icoord1 > (int) size - 1)
450 *icoord1 = size - 1;
451 *w = frac(u);
452 }
453
454
455 /**
456 * PIPE_TEX_WRAP_CLAMP_TO_EDGE for linear sampling, unnormalized coords.
457 */
458 static void
459 wrap_linear_unorm_clamp_to_edge(float s, unsigned size,
460 int *icoord0, int *icoord1, float *w)
461 {
462 float u = CLAMP(s, +0.5F, (float) size - 0.5F);
463 u -= 0.5F;
464 *icoord0 = util_ifloor(u);
465 *icoord1 = *icoord0 + 1;
466 if (*icoord1 > (int) size - 1)
467 *icoord1 = size - 1;
468 *w = frac(u);
469 }
470
471
472 /**
473 * Do coordinate to array index conversion. For array textures.
474 */
475 static INLINE void
476 wrap_array_layer(float coord, unsigned size, int *layer)
477 {
478 int c = util_ifloor(coord + 0.5F);
479 *layer = CLAMP(c, 0, size - 1);
480 }
481
482
483 /**
484 * Examine the quad's texture coordinates to compute the partial
485 * derivatives w.r.t X and Y, then compute lambda (level of detail).
486 */
487 static float
488 compute_lambda_1d(const struct sp_sampler_variant *samp,
489 const float s[TGSI_QUAD_SIZE],
490 const float t[TGSI_QUAD_SIZE],
491 const float p[TGSI_QUAD_SIZE])
492 {
493 const struct pipe_resource *texture = samp->view->texture;
494 float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
495 float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]);
496 float rho = MAX2(dsdx, dsdy) * u_minify(texture->width0, samp->view->u.tex.first_level);
497
498 return util_fast_log2(rho);
499 }
500
501
502 static float
503 compute_lambda_2d(const struct sp_sampler_variant *samp,
504 const float s[TGSI_QUAD_SIZE],
505 const float t[TGSI_QUAD_SIZE],
506 const float p[TGSI_QUAD_SIZE])
507 {
508 const struct pipe_resource *texture = samp->view->texture;
509 float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
510 float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]);
511 float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
512 float dtdy = fabsf(t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]);
513 float maxx = MAX2(dsdx, dsdy) * u_minify(texture->width0, samp->view->u.tex.first_level);
514 float maxy = MAX2(dtdx, dtdy) * u_minify(texture->height0, samp->view->u.tex.first_level);
515 float rho = MAX2(maxx, maxy);
516
517 return util_fast_log2(rho);
518 }
519
520
521 static float
522 compute_lambda_3d(const struct sp_sampler_variant *samp,
523 const float s[TGSI_QUAD_SIZE],
524 const float t[TGSI_QUAD_SIZE],
525 const float p[TGSI_QUAD_SIZE])
526 {
527 const struct pipe_resource *texture = samp->view->texture;
528 float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
529 float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]);
530 float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
531 float dtdy = fabsf(t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]);
532 float dpdx = fabsf(p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]);
533 float dpdy = fabsf(p[QUAD_TOP_LEFT] - p[QUAD_BOTTOM_LEFT]);
534 float maxx = MAX2(dsdx, dsdy) * u_minify(texture->width0, samp->view->u.tex.first_level);
535 float maxy = MAX2(dtdx, dtdy) * u_minify(texture->height0, samp->view->u.tex.first_level);
536 float maxz = MAX2(dpdx, dpdy) * u_minify(texture->depth0, samp->view->u.tex.first_level);
537 float rho;
538
539 rho = MAX2(maxx, maxy);
540 rho = MAX2(rho, maxz);
541
542 return util_fast_log2(rho);
543 }
544
545
546 /**
547 * Compute lambda for a vertex texture sampler.
548 * Since there aren't derivatives to use, just return 0.
549 */
550 static float
551 compute_lambda_vert(const struct sp_sampler_variant *samp,
552 const float s[TGSI_QUAD_SIZE],
553 const float t[TGSI_QUAD_SIZE],
554 const float p[TGSI_QUAD_SIZE])
555 {
556 return 0.0f;
557 }
558
559
560
561 /**
562 * Get a texel from a texture, using the texture tile cache.
563 *
564 * \param addr the template tex address containing cube, z, face info.
565 * \param x the x coord of texel within 2D image
566 * \param y the y coord of texel within 2D image
567 * \param rgba the quad to put the texel/color into
568 *
569 * XXX maybe move this into sp_tex_tile_cache.c and merge with the
570 * sp_get_cached_tile_tex() function.
571 */
572
573
574
575
576 static INLINE const float *
577 get_texel_2d_no_border(const struct sp_sampler_variant *samp,
578 union tex_tile_address addr, int x, int y)
579 {
580 const struct softpipe_tex_cached_tile *tile;
581
582 addr.bits.x = x / TILE_SIZE;
583 addr.bits.y = y / TILE_SIZE;
584 y %= TILE_SIZE;
585 x %= TILE_SIZE;
586
587 tile = sp_get_cached_tile_tex(samp->cache, addr);
588
589 return &tile->data.color[y][x][0];
590 }
591
592
593 static INLINE const float *
594 get_texel_2d(const struct sp_sampler_variant *samp,
595 union tex_tile_address addr, int x, int y)
596 {
597 const struct pipe_resource *texture = samp->view->texture;
598 unsigned level = addr.bits.level;
599
600 if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
601 y < 0 || y >= (int) u_minify(texture->height0, level)) {
602 return samp->sampler->border_color.f;
603 }
604 else {
605 return get_texel_2d_no_border( samp, addr, x, y );
606 }
607 }
608
609 /*
610 * seamless cubemap neighbour array.
611 * this array is used to find the adjacent face in each of 4 directions,
612 * left, right, up, down. (or -x, +x, -y, +y).
613 */
614 static const unsigned face_array[PIPE_TEX_FACE_MAX][4] = {
615 /* pos X first then neg X is Z different, Y the same */
616 /* PIPE_TEX_FACE_POS_X,*/
617 { PIPE_TEX_FACE_POS_Z, PIPE_TEX_FACE_NEG_Z,
618 PIPE_TEX_FACE_NEG_Y, PIPE_TEX_FACE_POS_Y },
619 /* PIPE_TEX_FACE_NEG_X */
620 { PIPE_TEX_FACE_NEG_Z, PIPE_TEX_FACE_POS_Z,
621 PIPE_TEX_FACE_NEG_Y, PIPE_TEX_FACE_POS_Y },
622
623 /* pos Y first then neg Y is X different, X the same */
624 /* PIPE_TEX_FACE_POS_Y */
625 { PIPE_TEX_FACE_NEG_X, PIPE_TEX_FACE_POS_X,
626 PIPE_TEX_FACE_POS_Z, PIPE_TEX_FACE_NEG_Z },
627
628 /* PIPE_TEX_FACE_NEG_Y */
629 { PIPE_TEX_FACE_NEG_X, PIPE_TEX_FACE_POS_X,
630 PIPE_TEX_FACE_NEG_Z, PIPE_TEX_FACE_POS_Z },
631
632 /* pos Z first then neg Y is X different, X the same */
633 /* PIPE_TEX_FACE_POS_Z */
634 { PIPE_TEX_FACE_NEG_X, PIPE_TEX_FACE_POS_X,
635 PIPE_TEX_FACE_NEG_Y, PIPE_TEX_FACE_POS_Y },
636
637 /* PIPE_TEX_FACE_NEG_Z */
638 { PIPE_TEX_FACE_POS_X, PIPE_TEX_FACE_NEG_X,
639 PIPE_TEX_FACE_NEG_Y, PIPE_TEX_FACE_POS_Y }
640 };
641
642 static INLINE unsigned
643 get_next_face(unsigned face, int x, int y)
644 {
645 int idx = 0;
646
647 if (x == 0 && y == 0)
648 return face;
649 if (x == -1)
650 idx = 0;
651 else if (x == 1)
652 idx = 1;
653 else if (y == -1)
654 idx = 2;
655 else if (y == 1)
656 idx = 3;
657
658 return face_array[face][idx];
659 }
660
661 static INLINE const float *
662 get_texel_cube_seamless(const struct sp_sampler_variant *samp,
663 union tex_tile_address addr, int x, int y,
664 float *corner)
665 {
666 const struct pipe_resource *texture = samp->view->texture;
667 unsigned level = addr.bits.level;
668 unsigned face = addr.bits.face;
669 int new_x, new_y;
670 int max_x, max_y;
671 int c;
672
673 max_x = (int) u_minify(texture->width0, level);
674 max_y = (int) u_minify(texture->height0, level);
675 new_x = x;
676 new_y = y;
677
678 /* the corner case */
679 if ((x < 0 || x >= max_x) &&
680 (y < 0 || y >= max_y)) {
681 const float *c1, *c2, *c3;
682 int fx = x < 0 ? 0 : max_x - 1;
683 int fy = y < 0 ? 0 : max_y - 1;
684 c1 = get_texel_2d_no_border( samp, addr, fx, fy);
685 addr.bits.face = get_next_face(face, (x < 0) ? -1 : 1, 0);
686 c2 = get_texel_2d_no_border( samp, addr, (x < 0) ? max_x - 1 : 0, fy);
687 addr.bits.face = get_next_face(face, 0, (y < 0) ? -1 : 1);
688 c3 = get_texel_2d_no_border( samp, addr, fx, (y < 0) ? max_y - 1 : 0);
689 for (c = 0; c < TGSI_QUAD_SIZE; c++)
690 corner[c] = CLAMP((c1[c] + c2[c] + c3[c]), 0.0F, 1.0F) / 3;
691
692 return corner;
693 }
694 /* change the face */
695 if (x < 0) {
696 new_x = max_x - 1;
697 face = get_next_face(face, -1, 0);
698 } else if (x >= max_x) {
699 new_x = 0;
700 face = get_next_face(face, 1, 0);
701 } else if (y < 0) {
702 new_y = max_y - 1;
703 face = get_next_face(face, 0, -1);
704 } else if (y >= max_y) {
705 new_y = 0;
706 face = get_next_face(face, 0, 1);
707 }
708
709 addr.bits.face = face;
710 return get_texel_2d_no_border( samp, addr, new_x, new_y );
711 }
712
713 /* Gather a quad of adjacent texels within a tile:
714 */
715 static INLINE void
716 get_texel_quad_2d_no_border_single_tile(const struct sp_sampler_variant *samp,
717 union tex_tile_address addr,
718 unsigned x, unsigned y,
719 const float *out[4])
720 {
721 const struct softpipe_tex_cached_tile *tile;
722
723 addr.bits.x = x / TILE_SIZE;
724 addr.bits.y = y / TILE_SIZE;
725 y %= TILE_SIZE;
726 x %= TILE_SIZE;
727
728 tile = sp_get_cached_tile_tex(samp->cache, addr);
729
730 out[0] = &tile->data.color[y ][x ][0];
731 out[1] = &tile->data.color[y ][x+1][0];
732 out[2] = &tile->data.color[y+1][x ][0];
733 out[3] = &tile->data.color[y+1][x+1][0];
734 }
735
736
737 /* Gather a quad of potentially non-adjacent texels:
738 */
739 static INLINE void
740 get_texel_quad_2d_no_border(const struct sp_sampler_variant *samp,
741 union tex_tile_address addr,
742 int x0, int y0,
743 int x1, int y1,
744 const float *out[4])
745 {
746 out[0] = get_texel_2d_no_border( samp, addr, x0, y0 );
747 out[1] = get_texel_2d_no_border( samp, addr, x1, y0 );
748 out[2] = get_texel_2d_no_border( samp, addr, x0, y1 );
749 out[3] = get_texel_2d_no_border( samp, addr, x1, y1 );
750 }
751
752 /* Can involve a lot of unnecessary checks for border color:
753 */
754 static INLINE void
755 get_texel_quad_2d(const struct sp_sampler_variant *samp,
756 union tex_tile_address addr,
757 int x0, int y0,
758 int x1, int y1,
759 const float *out[4])
760 {
761 out[0] = get_texel_2d( samp, addr, x0, y0 );
762 out[1] = get_texel_2d( samp, addr, x1, y0 );
763 out[3] = get_texel_2d( samp, addr, x1, y1 );
764 out[2] = get_texel_2d( samp, addr, x0, y1 );
765 }
766
767
768
769 /* 3d variants:
770 */
771 static INLINE const float *
772 get_texel_3d_no_border(const struct sp_sampler_variant *samp,
773 union tex_tile_address addr, int x, int y, int z)
774 {
775 const struct softpipe_tex_cached_tile *tile;
776
777 addr.bits.x = x / TILE_SIZE;
778 addr.bits.y = y / TILE_SIZE;
779 addr.bits.z = z;
780 y %= TILE_SIZE;
781 x %= TILE_SIZE;
782
783 tile = sp_get_cached_tile_tex(samp->cache, addr);
784
785 return &tile->data.color[y][x][0];
786 }
787
788
789 static INLINE const float *
790 get_texel_3d(const struct sp_sampler_variant *samp,
791 union tex_tile_address addr, int x, int y, int z)
792 {
793 const struct pipe_resource *texture = samp->view->texture;
794 unsigned level = addr.bits.level;
795
796 if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
797 y < 0 || y >= (int) u_minify(texture->height0, level) ||
798 z < 0 || z >= (int) u_minify(texture->depth0, level)) {
799 return samp->sampler->border_color.f;
800 }
801 else {
802 return get_texel_3d_no_border( samp, addr, x, y, z );
803 }
804 }
805
806
807 /* Get texel pointer for 1D array texture */
808 static INLINE const float *
809 get_texel_1d_array(const struct sp_sampler_variant *samp,
810 union tex_tile_address addr, int x, int y)
811 {
812 const struct pipe_resource *texture = samp->view->texture;
813 unsigned level = addr.bits.level;
814
815 if (x < 0 || x >= (int) u_minify(texture->width0, level)) {
816 return samp->sampler->border_color.f;
817 }
818 else {
819 return get_texel_2d_no_border(samp, addr, x, y);
820 }
821 }
822
823
824 /* Get texel pointer for 2D array texture */
825 static INLINE const float *
826 get_texel_2d_array(const struct sp_sampler_variant *samp,
827 union tex_tile_address addr, int x, int y, int layer)
828 {
829 const struct pipe_resource *texture = samp->view->texture;
830 unsigned level = addr.bits.level;
831
832 assert(layer < (int) texture->array_size);
833 assert(layer >= 0);
834
835 if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
836 y < 0 || y >= (int) u_minify(texture->height0, level)) {
837 return samp->sampler->border_color.f;
838 }
839 else {
840 return get_texel_3d_no_border(samp, addr, x, y, layer);
841 }
842 }
843
844
845 /* Get texel pointer for cube array texture */
846 static INLINE const float *
847 get_texel_cube_array(const struct sp_sampler_variant *samp,
848 union tex_tile_address addr, int x, int y, int layer)
849 {
850 const struct pipe_resource *texture = samp->view->texture;
851 unsigned level = addr.bits.level;
852
853 assert(layer < (int) texture->array_size);
854 assert(layer >= 0);
855
856 if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
857 y < 0 || y >= (int) u_minify(texture->height0, level)) {
858 return samp->sampler->border_color.f;
859 }
860 else {
861 return get_texel_3d_no_border(samp, addr, x, y, layer);
862 }
863 }
864 /**
865 * Given the logbase2 of a mipmap's base level size and a mipmap level,
866 * return the size (in texels) of that mipmap level.
867 * For example, if level[0].width = 256 then base_pot will be 8.
868 * If level = 2, then we'll return 64 (the width at level=2).
869 * Return 1 if level > base_pot.
870 */
871 static INLINE unsigned
872 pot_level_size(unsigned base_pot, unsigned level)
873 {
874 return (base_pot >= level) ? (1 << (base_pot - level)) : 1;
875 }
876
877
878 static void
879 print_sample(const char *function, const float *rgba)
880 {
881 debug_printf("%s %g %g %g %g\n",
882 function,
883 rgba[0], rgba[TGSI_NUM_CHANNELS], rgba[2*TGSI_NUM_CHANNELS], rgba[3*TGSI_NUM_CHANNELS]);
884 }
885
886
887 static void
888 print_sample_4(const char *function, float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
889 {
890 debug_printf("%s %g %g %g %g, %g %g %g %g, %g %g %g %g, %g %g %g %g\n",
891 function,
892 rgba[0][0], rgba[1][0], rgba[2][0], rgba[3][0],
893 rgba[0][1], rgba[1][1], rgba[2][1], rgba[3][1],
894 rgba[0][2], rgba[1][2], rgba[2][2], rgba[3][2],
895 rgba[0][3], rgba[1][3], rgba[2][3], rgba[3][3]);
896 }
897
898 /* Some image-filter fastpaths:
899 */
900 static INLINE void
901 img_filter_2d_linear_repeat_POT(struct sp_sampler_variant *samp,
902 float s,
903 float t,
904 float p,
905 unsigned level,
906 unsigned face_id,
907 float *rgba)
908 {
909 unsigned xpot = pot_level_size(samp->xpot, level);
910 unsigned ypot = pot_level_size(samp->ypot, level);
911 unsigned xmax = (xpot - 1) & (TILE_SIZE - 1); /* MIN2(TILE_SIZE, xpot) - 1; */
912 unsigned ymax = (ypot - 1) & (TILE_SIZE - 1); /* MIN2(TILE_SIZE, ypot) - 1; */
913 union tex_tile_address addr;
914 int c;
915
916 float u = s * xpot - 0.5F;
917 float v = t * ypot - 0.5F;
918
919 int uflr = util_ifloor(u);
920 int vflr = util_ifloor(v);
921
922 float xw = u - (float)uflr;
923 float yw = v - (float)vflr;
924
925 int x0 = uflr & (xpot - 1);
926 int y0 = vflr & (ypot - 1);
927
928 const float *tx[4];
929
930 addr.value = 0;
931 addr.bits.level = level;
932
933 /* Can we fetch all four at once:
934 */
935 if (x0 < xmax && y0 < ymax) {
936 get_texel_quad_2d_no_border_single_tile(samp, addr, x0, y0, tx);
937 }
938 else {
939 unsigned x1 = (x0 + 1) & (xpot - 1);
940 unsigned y1 = (y0 + 1) & (ypot - 1);
941 get_texel_quad_2d_no_border(samp, addr, x0, y0, x1, y1, tx);
942 }
943
944 /* interpolate R, G, B, A */
945 for (c = 0; c < TGSI_QUAD_SIZE; c++) {
946 rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
947 tx[0][c], tx[1][c],
948 tx[2][c], tx[3][c]);
949 }
950
951 if (DEBUG_TEX) {
952 print_sample(__FUNCTION__, rgba);
953 }
954 }
955
956
957 static INLINE void
958 img_filter_2d_nearest_repeat_POT(struct sp_sampler_variant *samp,
959 float s,
960 float t,
961 float p,
962 unsigned level,
963 unsigned face_id,
964 float rgba[TGSI_QUAD_SIZE])
965 {
966 unsigned xpot = pot_level_size(samp->xpot, level);
967 unsigned ypot = pot_level_size(samp->ypot, level);
968 const float *out;
969 union tex_tile_address addr;
970 int c;
971
972 float u = s * xpot;
973 float v = t * ypot;
974
975 int uflr = util_ifloor(u);
976 int vflr = util_ifloor(v);
977
978 int x0 = uflr & (xpot - 1);
979 int y0 = vflr & (ypot - 1);
980
981 addr.value = 0;
982 addr.bits.level = level;
983
984 out = get_texel_2d_no_border(samp, addr, x0, y0);
985 for (c = 0; c < TGSI_QUAD_SIZE; c++)
986 rgba[TGSI_NUM_CHANNELS*c] = out[c];
987
988 if (DEBUG_TEX) {
989 print_sample(__FUNCTION__, rgba);
990 }
991 }
992
993
994 static INLINE void
995 img_filter_2d_nearest_clamp_POT(struct sp_sampler_variant *samp,
996 float s,
997 float t,
998 float p,
999 unsigned level,
1000 unsigned face_id,
1001 float rgba[TGSI_QUAD_SIZE])
1002 {
1003 unsigned xpot = pot_level_size(samp->xpot, level);
1004 unsigned ypot = pot_level_size(samp->ypot, level);
1005 union tex_tile_address addr;
1006 int c;
1007
1008 float u = s * xpot;
1009 float v = t * ypot;
1010
1011 int x0, y0;
1012 const float *out;
1013
1014 addr.value = 0;
1015 addr.bits.level = level;
1016
1017 x0 = util_ifloor(u);
1018 if (x0 < 0)
1019 x0 = 0;
1020 else if (x0 > xpot - 1)
1021 x0 = xpot - 1;
1022
1023 y0 = util_ifloor(v);
1024 if (y0 < 0)
1025 y0 = 0;
1026 else if (y0 > ypot - 1)
1027 y0 = ypot - 1;
1028
1029 out = get_texel_2d_no_border(samp, addr, x0, y0);
1030 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1031 rgba[TGSI_NUM_CHANNELS*c] = out[c];
1032
1033 if (DEBUG_TEX) {
1034 print_sample(__FUNCTION__, rgba);
1035 }
1036 }
1037
1038
1039 static void
1040 img_filter_1d_nearest(struct sp_sampler_variant *samp,
1041 float s,
1042 float t,
1043 float p,
1044 unsigned level,
1045 unsigned face_id,
1046 float rgba[TGSI_QUAD_SIZE])
1047 {
1048 const struct pipe_resource *texture = samp->view->texture;
1049 int width;
1050 int x;
1051 union tex_tile_address addr;
1052 const float *out;
1053 int c;
1054
1055 width = u_minify(texture->width0, level);
1056
1057 assert(width > 0);
1058
1059 addr.value = 0;
1060 addr.bits.level = level;
1061
1062 samp->nearest_texcoord_s(s, width, &x);
1063
1064 out = get_texel_2d(samp, addr, x, 0);
1065 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1066 rgba[TGSI_NUM_CHANNELS*c] = out[c];
1067
1068 if (DEBUG_TEX) {
1069 print_sample(__FUNCTION__, rgba);
1070 }
1071 }
1072
1073
1074 static void
1075 img_filter_1d_array_nearest(struct sp_sampler_variant *samp,
1076 float s,
1077 float t,
1078 float p,
1079 unsigned level,
1080 unsigned face_id,
1081 float *rgba)
1082 {
1083 const struct pipe_resource *texture = samp->view->texture;
1084 int width;
1085 int x, layer;
1086 union tex_tile_address addr;
1087 const float *out;
1088 int c;
1089
1090 width = u_minify(texture->width0, level);
1091
1092 assert(width > 0);
1093
1094 addr.value = 0;
1095 addr.bits.level = level;
1096
1097 samp->nearest_texcoord_s(s, width, &x);
1098 wrap_array_layer(t, texture->array_size, &layer);
1099
1100 out = get_texel_1d_array(samp, addr, x, layer);
1101 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1102 rgba[TGSI_NUM_CHANNELS*c] = out[c];
1103
1104 if (DEBUG_TEX) {
1105 print_sample(__FUNCTION__, rgba);
1106 }
1107 }
1108
1109
1110 static void
1111 img_filter_2d_nearest(struct sp_sampler_variant *samp,
1112 float s,
1113 float t,
1114 float p,
1115 unsigned level,
1116 unsigned face_id,
1117 float *rgba)
1118 {
1119 const struct pipe_resource *texture = samp->view->texture;
1120 int width, height;
1121 int x, y;
1122 union tex_tile_address addr;
1123 const float *out;
1124 int c;
1125
1126 width = u_minify(texture->width0, level);
1127 height = u_minify(texture->height0, level);
1128
1129 assert(width > 0);
1130 assert(height > 0);
1131
1132 addr.value = 0;
1133 addr.bits.level = level;
1134
1135 samp->nearest_texcoord_s(s, width, &x);
1136 samp->nearest_texcoord_t(t, height, &y);
1137
1138 out = get_texel_2d(samp, addr, x, y);
1139 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1140 rgba[TGSI_NUM_CHANNELS*c] = out[c];
1141
1142 if (DEBUG_TEX) {
1143 print_sample(__FUNCTION__, rgba);
1144 }
1145 }
1146
1147
1148 static void
1149 img_filter_2d_array_nearest(struct sp_sampler_variant *samp,
1150 float s,
1151 float t,
1152 float p,
1153 unsigned level,
1154 unsigned face_id,
1155 float *rgba)
1156 {
1157 const struct pipe_resource *texture = samp->view->texture;
1158 int width, height;
1159 int x, y, layer;
1160 union tex_tile_address addr;
1161 const float *out;
1162 int c;
1163
1164 width = u_minify(texture->width0, level);
1165 height = u_minify(texture->height0, level);
1166
1167 assert(width > 0);
1168 assert(height > 0);
1169
1170 addr.value = 0;
1171 addr.bits.level = level;
1172
1173 samp->nearest_texcoord_s(s, width, &x);
1174 samp->nearest_texcoord_t(t, height, &y);
1175 wrap_array_layer(p, texture->array_size, &layer);
1176
1177 out = get_texel_2d_array(samp, addr, x, y, layer);
1178 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1179 rgba[TGSI_NUM_CHANNELS*c] = out[c];
1180
1181 if (DEBUG_TEX) {
1182 print_sample(__FUNCTION__, rgba);
1183 }
1184 }
1185
1186
1187 static INLINE union tex_tile_address
1188 face(union tex_tile_address addr, unsigned face )
1189 {
1190 addr.bits.face = face;
1191 return addr;
1192 }
1193
1194
1195 static void
1196 img_filter_cube_nearest(struct sp_sampler_variant *samp,
1197 float s,
1198 float t,
1199 float p,
1200 unsigned level,
1201 unsigned face_id,
1202 float *rgba)
1203 {
1204 const struct pipe_resource *texture = samp->view->texture;
1205 int width, height;
1206 int x, y;
1207 union tex_tile_address addr;
1208 const float *out;
1209 int c;
1210
1211 width = u_minify(texture->width0, level);
1212 height = u_minify(texture->height0, level);
1213
1214 assert(width > 0);
1215 assert(height > 0);
1216
1217 addr.value = 0;
1218 addr.bits.level = level;
1219
1220 /*
1221 * If NEAREST filtering is done within a miplevel, always apply wrap
1222 * mode CLAMP_TO_EDGE.
1223 */
1224 if (samp->sampler->seamless_cube_map) {
1225 wrap_nearest_clamp_to_edge(s, width, &x);
1226 wrap_nearest_clamp_to_edge(t, height, &y);
1227 } else {
1228 samp->nearest_texcoord_s(s, width, &x);
1229 samp->nearest_texcoord_t(t, height, &y);
1230 }
1231
1232 out = get_texel_2d(samp, face(addr, face_id), x, y);
1233 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1234 rgba[TGSI_NUM_CHANNELS*c] = out[c];
1235
1236 if (DEBUG_TEX) {
1237 print_sample(__FUNCTION__, rgba);
1238 }
1239 }
1240
1241 static void
1242 img_filter_cube_array_nearest(struct sp_sampler_variant *samp,
1243 float s,
1244 float t,
1245 float p,
1246 unsigned level,
1247 unsigned face_id,
1248 float *rgba)
1249 {
1250 const struct pipe_resource *texture = samp->view->texture;
1251 int width, height;
1252 int x, y, layer;
1253 union tex_tile_address addr;
1254 const float *out;
1255 int c;
1256
1257 width = u_minify(texture->width0, level);
1258 height = u_minify(texture->height0, level);
1259
1260 assert(width > 0);
1261 assert(height > 0);
1262
1263 addr.value = 0;
1264 addr.bits.level = level;
1265
1266 samp->nearest_texcoord_s(s, width, &x);
1267 samp->nearest_texcoord_t(t, height, &y);
1268 wrap_array_layer(p, texture->array_size, &layer);
1269
1270 out = get_texel_cube_array(samp, addr, x, y, layer * 6 + face_id);
1271 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1272 rgba[TGSI_NUM_CHANNELS*c] = out[c];
1273
1274 if (DEBUG_TEX) {
1275 print_sample(__FUNCTION__, rgba);
1276 }
1277 }
1278
1279 static void
1280 img_filter_3d_nearest(struct sp_sampler_variant *samp,
1281 float s,
1282 float t,
1283 float p,
1284 unsigned level,
1285 unsigned face_id,
1286 float *rgba)
1287 {
1288 const struct pipe_resource *texture = samp->view->texture;
1289 int width, height, depth;
1290 int x, y, z;
1291 union tex_tile_address addr;
1292 const float *out;
1293 int c;
1294
1295 width = u_minify(texture->width0, level);
1296 height = u_minify(texture->height0, level);
1297 depth = u_minify(texture->depth0, level);
1298
1299 assert(width > 0);
1300 assert(height > 0);
1301 assert(depth > 0);
1302
1303 samp->nearest_texcoord_s(s, width, &x);
1304 samp->nearest_texcoord_t(t, height, &y);
1305 samp->nearest_texcoord_p(p, depth, &z);
1306
1307 addr.value = 0;
1308 addr.bits.level = level;
1309
1310 out = get_texel_3d(samp, addr, x, y, z);
1311 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1312 rgba[TGSI_NUM_CHANNELS*c] = out[c];
1313 }
1314
1315
1316 static void
1317 img_filter_1d_linear(struct sp_sampler_variant *samp,
1318 float s,
1319 float t,
1320 float p,
1321 unsigned level,
1322 unsigned face_id,
1323 float *rgba)
1324 {
1325 const struct pipe_resource *texture = samp->view->texture;
1326 int width;
1327 int x0, x1;
1328 float xw; /* weights */
1329 union tex_tile_address addr;
1330 const float *tx0, *tx1;
1331 int c;
1332
1333 width = u_minify(texture->width0, level);
1334
1335 assert(width > 0);
1336
1337 addr.value = 0;
1338 addr.bits.level = level;
1339
1340 samp->linear_texcoord_s(s, width, &x0, &x1, &xw);
1341
1342 tx0 = get_texel_2d(samp, addr, x0, 0);
1343 tx1 = get_texel_2d(samp, addr, x1, 0);
1344
1345 /* interpolate R, G, B, A */
1346 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1347 rgba[TGSI_NUM_CHANNELS*c] = lerp(xw, tx0[c], tx1[c]);
1348 }
1349
1350
1351 static void
1352 img_filter_1d_array_linear(struct sp_sampler_variant *samp,
1353 float s,
1354 float t,
1355 float p,
1356 unsigned level,
1357 unsigned face_id,
1358 float *rgba)
1359 {
1360 const struct pipe_resource *texture = samp->view->texture;
1361 int width;
1362 int x0, x1, layer;
1363 float xw; /* weights */
1364 union tex_tile_address addr;
1365 const float *tx0, *tx1;
1366 int c;
1367
1368 width = u_minify(texture->width0, level);
1369
1370 assert(width > 0);
1371
1372 addr.value = 0;
1373 addr.bits.level = level;
1374
1375 samp->linear_texcoord_s(s, width, &x0, &x1, &xw);
1376 wrap_array_layer(t, texture->array_size, &layer);
1377
1378 tx0 = get_texel_1d_array(samp, addr, x0, layer);
1379 tx1 = get_texel_1d_array(samp, addr, x1, layer);
1380
1381 /* interpolate R, G, B, A */
1382 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1383 rgba[TGSI_NUM_CHANNELS*c] = lerp(xw, tx0[c], tx1[c]);
1384 }
1385
1386
1387 static void
1388 img_filter_2d_linear(struct sp_sampler_variant *samp,
1389 float s,
1390 float t,
1391 float p,
1392 unsigned level,
1393 unsigned face_id,
1394 float *rgba)
1395 {
1396 const struct pipe_resource *texture = samp->view->texture;
1397 int width, height;
1398 int x0, y0, x1, y1;
1399 float xw, yw; /* weights */
1400 union tex_tile_address addr;
1401 const float *tx0, *tx1, *tx2, *tx3;
1402 int c;
1403
1404 width = u_minify(texture->width0, level);
1405 height = u_minify(texture->height0, level);
1406
1407 assert(width > 0);
1408 assert(height > 0);
1409
1410 addr.value = 0;
1411 addr.bits.level = level;
1412
1413 samp->linear_texcoord_s(s, width, &x0, &x1, &xw);
1414 samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
1415
1416 tx0 = get_texel_2d(samp, addr, x0, y0);
1417 tx1 = get_texel_2d(samp, addr, x1, y0);
1418 tx2 = get_texel_2d(samp, addr, x0, y1);
1419 tx3 = get_texel_2d(samp, addr, x1, y1);
1420
1421 /* interpolate R, G, B, A */
1422 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1423 rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1424 tx0[c], tx1[c],
1425 tx2[c], tx3[c]);
1426 }
1427
1428
1429 static void
1430 img_filter_2d_array_linear(struct sp_sampler_variant *samp,
1431 float s,
1432 float t,
1433 float p,
1434 unsigned level,
1435 unsigned face_id,
1436 float *rgba)
1437 {
1438 const struct pipe_resource *texture = samp->view->texture;
1439 int width, height;
1440 int x0, y0, x1, y1, layer;
1441 float xw, yw; /* weights */
1442 union tex_tile_address addr;
1443 const float *tx0, *tx1, *tx2, *tx3;
1444 int c;
1445
1446 width = u_minify(texture->width0, level);
1447 height = u_minify(texture->height0, level);
1448
1449 assert(width > 0);
1450 assert(height > 0);
1451
1452 addr.value = 0;
1453 addr.bits.level = level;
1454
1455 samp->linear_texcoord_s(s, width, &x0, &x1, &xw);
1456 samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
1457 wrap_array_layer(p, texture->array_size, &layer);
1458
1459 tx0 = get_texel_2d_array(samp, addr, x0, y0, layer);
1460 tx1 = get_texel_2d_array(samp, addr, x1, y0, layer);
1461 tx2 = get_texel_2d_array(samp, addr, x0, y1, layer);
1462 tx3 = get_texel_2d_array(samp, addr, x1, y1, layer);
1463
1464 /* interpolate R, G, B, A */
1465 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1466 rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1467 tx0[c], tx1[c],
1468 tx2[c], tx3[c]);
1469 }
1470
1471
1472 static void
1473 img_filter_cube_linear(struct sp_sampler_variant *samp,
1474 float s,
1475 float t,
1476 float p,
1477 unsigned level,
1478 unsigned face_id,
1479 float *rgba)
1480 {
1481 const struct pipe_resource *texture = samp->view->texture;
1482 int width, height;
1483 int x0, y0, x1, y1;
1484 float xw, yw; /* weights */
1485 union tex_tile_address addr, addrj;
1486 const float *tx0, *tx1, *tx2, *tx3;
1487 float corner0[TGSI_QUAD_SIZE], corner1[TGSI_QUAD_SIZE], corner2[TGSI_QUAD_SIZE], corner3[TGSI_QUAD_SIZE];
1488 int c;
1489
1490 width = u_minify(texture->width0, level);
1491 height = u_minify(texture->height0, level);
1492
1493 assert(width > 0);
1494 assert(height > 0);
1495
1496 addr.value = 0;
1497 addr.bits.level = level;
1498
1499 /*
1500 * For seamless if LINEAR filtering is done within a miplevel,
1501 * always apply wrap mode CLAMP_TO_BORDER.
1502 */
1503 if (samp->sampler->seamless_cube_map) {
1504 wrap_linear_clamp_to_border(s, width, &x0, &x1, &xw);
1505 wrap_linear_clamp_to_border(t, height, &y0, &y1, &yw);
1506 } else {
1507 samp->linear_texcoord_s(s, width, &x0, &x1, &xw);
1508 samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
1509 }
1510
1511 addrj = face(addr, face_id);
1512
1513 if (samp->sampler->seamless_cube_map) {
1514 tx0 = get_texel_cube_seamless(samp, addrj, x0, y0, corner0);
1515 tx1 = get_texel_cube_seamless(samp, addrj, x1, y0, corner1);
1516 tx2 = get_texel_cube_seamless(samp, addrj, x0, y1, corner2);
1517 tx3 = get_texel_cube_seamless(samp, addrj, x1, y1, corner3);
1518 } else {
1519 tx0 = get_texel_2d(samp, addrj, x0, y0);
1520 tx1 = get_texel_2d(samp, addrj, x1, y0);
1521 tx2 = get_texel_2d(samp, addrj, x0, y1);
1522 tx3 = get_texel_2d(samp, addrj, x1, y1);
1523 }
1524 /* interpolate R, G, B, A */
1525 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1526 rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1527 tx0[c], tx1[c],
1528 tx2[c], tx3[c]);
1529 }
1530
1531
1532 static void
1533 img_filter_cube_array_linear(struct sp_sampler_variant *samp,
1534 float s,
1535 float t,
1536 float p,
1537 unsigned level,
1538 unsigned face_id,
1539 float *rgba)
1540 {
1541 const struct pipe_resource *texture = samp->view->texture;
1542 int width, height;
1543 int x0, y0, x1, y1, layer;
1544 float xw, yw; /* weights */
1545 union tex_tile_address addr;
1546 const float *tx0, *tx1, *tx2, *tx3;
1547 int c;
1548
1549 width = u_minify(texture->width0, level);
1550 height = u_minify(texture->height0, level);
1551
1552 assert(width > 0);
1553 assert(height > 0);
1554
1555 addr.value = 0;
1556 addr.bits.level = level;
1557
1558 samp->linear_texcoord_s(s, width, &x0, &x1, &xw);
1559 samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
1560 wrap_array_layer(p, texture->array_size, &layer);
1561
1562 tx0 = get_texel_cube_array(samp, addr, x0, y0, layer * 6 + face_id);
1563 tx1 = get_texel_cube_array(samp, addr, x1, y0, layer * 6 + face_id);
1564 tx2 = get_texel_cube_array(samp, addr, x0, y1, layer * 6 + face_id);
1565 tx3 = get_texel_cube_array(samp, addr, x1, y1, layer * 6 + face_id);
1566
1567 /* interpolate R, G, B, A */
1568 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1569 rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1570 tx0[c], tx1[c],
1571 tx2[c], tx3[c]);
1572 }
1573
1574 static void
1575 img_filter_3d_linear(struct sp_sampler_variant *samp,
1576 float s,
1577 float t,
1578 float p,
1579 unsigned level,
1580 unsigned face_id,
1581 float *rgba)
1582 {
1583 const struct pipe_resource *texture = samp->view->texture;
1584 int width, height, depth;
1585 int x0, x1, y0, y1, z0, z1;
1586 float xw, yw, zw; /* interpolation weights */
1587 union tex_tile_address addr;
1588 const float *tx00, *tx01, *tx02, *tx03, *tx10, *tx11, *tx12, *tx13;
1589 int c;
1590
1591 width = u_minify(texture->width0, level);
1592 height = u_minify(texture->height0, level);
1593 depth = u_minify(texture->depth0, level);
1594
1595 addr.value = 0;
1596 addr.bits.level = level;
1597
1598 assert(width > 0);
1599 assert(height > 0);
1600 assert(depth > 0);
1601
1602 samp->linear_texcoord_s(s, width, &x0, &x1, &xw);
1603 samp->linear_texcoord_t(t, height, &y0, &y1, &yw);
1604 samp->linear_texcoord_p(p, depth, &z0, &z1, &zw);
1605
1606
1607 tx00 = get_texel_3d(samp, addr, x0, y0, z0);
1608 tx01 = get_texel_3d(samp, addr, x1, y0, z0);
1609 tx02 = get_texel_3d(samp, addr, x0, y1, z0);
1610 tx03 = get_texel_3d(samp, addr, x1, y1, z0);
1611
1612 tx10 = get_texel_3d(samp, addr, x0, y0, z1);
1613 tx11 = get_texel_3d(samp, addr, x1, y0, z1);
1614 tx12 = get_texel_3d(samp, addr, x0, y1, z1);
1615 tx13 = get_texel_3d(samp, addr, x1, y1, z1);
1616
1617 /* interpolate R, G, B, A */
1618 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1619 rgba[TGSI_NUM_CHANNELS*c] = lerp_3d(xw, yw, zw,
1620 tx00[c], tx01[c],
1621 tx02[c], tx03[c],
1622 tx10[c], tx11[c],
1623 tx12[c], tx13[c]);
1624 }
1625
1626
1627 /* Calculate level of detail for every fragment,
1628 * with lambda already computed.
1629 * Note that lambda has already been biased by global LOD bias.
1630 * \param biased_lambda per-quad lambda.
1631 * \param lod_in per-fragment lod_bias or explicit_lod.
1632 * \param lod returns the per-fragment lod.
1633 */
1634 static INLINE void
1635 compute_lod(const struct pipe_sampler_state *sampler,
1636 enum tgsi_sampler_control control,
1637 const float biased_lambda,
1638 const float lod_in[TGSI_QUAD_SIZE],
1639 float lod[TGSI_QUAD_SIZE])
1640 {
1641 float min_lod = sampler->min_lod;
1642 float max_lod = sampler->max_lod;
1643 uint i;
1644
1645 switch (control) {
1646 case tgsi_sampler_lod_none:
1647 case tgsi_sampler_lod_zero:
1648 /* XXX FIXME */
1649 case tgsi_sampler_derivs_explicit:
1650 lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(biased_lambda, min_lod, max_lod);
1651 break;
1652 case tgsi_sampler_lod_bias:
1653 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1654 lod[i] = biased_lambda + lod_in[i];
1655 lod[i] = CLAMP(lod[i], min_lod, max_lod);
1656 }
1657 break;
1658 case tgsi_sampler_lod_explicit:
1659 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1660 lod[i] = CLAMP(lod_in[i], min_lod, max_lod);
1661 }
1662 break;
1663 default:
1664 assert(0);
1665 lod[0] = lod[1] = lod[2] = lod[3] = 0.0f;
1666 }
1667 }
1668
1669
1670 /* Calculate level of detail for every fragment.
1671 * \param lod_in per-fragment lod_bias or explicit_lod.
1672 * \param lod results per-fragment lod.
1673 */
1674 static INLINE void
1675 compute_lambda_lod(struct sp_sampler_variant *samp,
1676 const float s[TGSI_QUAD_SIZE],
1677 const float t[TGSI_QUAD_SIZE],
1678 const float p[TGSI_QUAD_SIZE],
1679 const float lod_in[TGSI_QUAD_SIZE],
1680 enum tgsi_sampler_control control,
1681 float lod[TGSI_QUAD_SIZE])
1682 {
1683 const struct pipe_sampler_state *sampler = samp->sampler;
1684 float lod_bias = sampler->lod_bias;
1685 float min_lod = sampler->min_lod;
1686 float max_lod = sampler->max_lod;
1687 float lambda;
1688 uint i;
1689
1690 switch (control) {
1691 case tgsi_sampler_lod_none:
1692 /* XXX FIXME */
1693 case tgsi_sampler_derivs_explicit:
1694 lambda = samp->compute_lambda(samp, s, t, p) + lod_bias;
1695 lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(lambda, min_lod, max_lod);
1696 break;
1697 case tgsi_sampler_lod_bias:
1698 lambda = samp->compute_lambda(samp, s, t, p) + lod_bias;
1699 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1700 lod[i] = lambda + lod_in[i];
1701 lod[i] = CLAMP(lod[i], min_lod, max_lod);
1702 }
1703 break;
1704 case tgsi_sampler_lod_explicit:
1705 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1706 lod[i] = CLAMP(lod_in[i], min_lod, max_lod);
1707 }
1708 break;
1709 case tgsi_sampler_lod_zero:
1710 /* this is all static state in the sampler really need clamp here? */
1711 lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(lod_bias, min_lod, max_lod);
1712 break;
1713 default:
1714 assert(0);
1715 lod[0] = lod[1] = lod[2] = lod[3] = 0.0f;
1716 }
1717 }
1718
1719
1720 static void
1721 mip_filter_linear(struct sp_sampler_variant *samp,
1722 const float s[TGSI_QUAD_SIZE],
1723 const float t[TGSI_QUAD_SIZE],
1724 const float p[TGSI_QUAD_SIZE],
1725 const float c0[TGSI_QUAD_SIZE],
1726 const float lod_in[TGSI_QUAD_SIZE],
1727 enum tgsi_sampler_control control,
1728 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1729 {
1730 const struct pipe_resource *texture = samp->view->texture;
1731 int j;
1732 float lod[TGSI_QUAD_SIZE];
1733
1734 compute_lambda_lod(samp, s, t, p, lod_in, control, lod);
1735
1736 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1737 int level0 = samp->view->u.tex.first_level + (int)lod[j];
1738
1739 if (lod[j] < 0.0)
1740 samp->mag_img_filter(samp, s[j], t[j], p[j], samp->view->u.tex.first_level,
1741 samp->faces[j], &rgba[0][j]);
1742
1743 else if (level0 >= texture->last_level)
1744 samp->min_img_filter(samp, s[j], t[j], p[j], texture->last_level,
1745 samp->faces[j], &rgba[0][j]);
1746
1747 else {
1748 float levelBlend = frac(lod[j]);
1749 float rgbax[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
1750 int c;
1751
1752 samp->min_img_filter(samp, s[j], t[j], p[j], level0,
1753 samp->faces[j], &rgbax[0][0]);
1754 samp->min_img_filter(samp, s[j], t[j], p[j], level0+1,
1755 samp->faces[j], &rgbax[0][1]);
1756
1757 for (c = 0; c < 4; c++) {
1758 rgba[c][j] = lerp(levelBlend, rgbax[c][0], rgbax[c][1]);
1759 }
1760 }
1761 }
1762
1763 if (DEBUG_TEX) {
1764 print_sample_4(__FUNCTION__, rgba);
1765 }
1766 }
1767
1768
1769 /**
1770 * Compute nearest mipmap level from texcoords.
1771 * Then sample the texture level for four elements of a quad.
1772 * \param c0 the LOD bias factors, or absolute LODs (depending on control)
1773 */
1774 static void
1775 mip_filter_nearest(struct sp_sampler_variant *samp,
1776 const float s[TGSI_QUAD_SIZE],
1777 const float t[TGSI_QUAD_SIZE],
1778 const float p[TGSI_QUAD_SIZE],
1779 const float c0[TGSI_QUAD_SIZE],
1780 const float lod_in[TGSI_QUAD_SIZE],
1781 enum tgsi_sampler_control control,
1782 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1783 {
1784 const struct pipe_resource *texture = samp->view->texture;
1785 float lod[TGSI_QUAD_SIZE];
1786 int j;
1787
1788 compute_lambda_lod(samp, s, t, p, lod_in, control, lod);
1789
1790 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1791 if (lod[j] < 0.0)
1792 samp->mag_img_filter(samp, s[j], t[j], p[j], samp->view->u.tex.first_level,
1793 samp->faces[j], &rgba[0][j]);
1794 else {
1795 float level = samp->view->u.tex.first_level + (int)(lod[j] + 0.5F) ;
1796 level = MIN2(level, (int)texture->last_level);
1797 samp->min_img_filter(samp, s[j], t[j], p[j], level, samp->faces[j],
1798 &rgba[0][j]);
1799 }
1800 }
1801
1802 if (DEBUG_TEX) {
1803 print_sample_4(__FUNCTION__, rgba);
1804 }
1805 }
1806
1807
1808 static void
1809 mip_filter_none(struct sp_sampler_variant *samp,
1810 const float s[TGSI_QUAD_SIZE],
1811 const float t[TGSI_QUAD_SIZE],
1812 const float p[TGSI_QUAD_SIZE],
1813 const float c0[TGSI_QUAD_SIZE],
1814 const float lod_in[TGSI_QUAD_SIZE],
1815 enum tgsi_sampler_control control,
1816 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1817 {
1818 float lod[TGSI_QUAD_SIZE];
1819 int j;
1820
1821 compute_lambda_lod(samp, s, t, p, lod_in, control, lod);
1822
1823 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1824 if (lod[j] < 0.0) {
1825 samp->mag_img_filter(samp, s[j], t[j], p[j], samp->view->u.tex.first_level,
1826 samp->faces[j], &rgba[0][j]);
1827 }
1828 else {
1829 samp->min_img_filter(samp, s[j], t[j], p[j], samp->view->u.tex.first_level,
1830 samp->faces[j], &rgba[0][j]);
1831 }
1832 }
1833 }
1834
1835
1836 static void
1837 mip_filter_none_no_filter_select(struct sp_sampler_variant *samp,
1838 const float s[TGSI_QUAD_SIZE],
1839 const float t[TGSI_QUAD_SIZE],
1840 const float p[TGSI_QUAD_SIZE],
1841 const float c0[TGSI_QUAD_SIZE],
1842 const float lod_in[TGSI_QUAD_SIZE],
1843 enum tgsi_sampler_control control,
1844 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1845 {
1846 int j;
1847
1848 for (j = 0; j < TGSI_QUAD_SIZE; j++)
1849 samp->mag_img_filter(samp, s[j], t[j], p[j], samp->view->u.tex.first_level,
1850 samp->faces[j], &rgba[0][j]);
1851 }
1852
1853
1854 /* For anisotropic filtering */
1855 #define WEIGHT_LUT_SIZE 1024
1856
1857 static float *weightLut = NULL;
1858
1859 /**
1860 * Creates the look-up table used to speed-up EWA sampling
1861 */
1862 static void
1863 create_filter_table(void)
1864 {
1865 unsigned i;
1866 if (!weightLut) {
1867 weightLut = (float *) MALLOC(WEIGHT_LUT_SIZE * sizeof(float));
1868
1869 for (i = 0; i < WEIGHT_LUT_SIZE; ++i) {
1870 float alpha = 2;
1871 float r2 = (float) i / (float) (WEIGHT_LUT_SIZE - 1);
1872 float weight = (float) exp(-alpha * r2);
1873 weightLut[i] = weight;
1874 }
1875 }
1876 }
1877
1878
1879 /**
1880 * Elliptical weighted average (EWA) filter for producing high quality
1881 * anisotropic filtered results.
1882 * Based on the Higher Quality Elliptical Weighted Average Filter
1883 * published by Paul S. Heckbert in his Master's Thesis
1884 * "Fundamentals of Texture Mapping and Image Warping" (1989)
1885 */
1886 static void
1887 img_filter_2d_ewa(struct sp_sampler_variant *samp,
1888 const float s[TGSI_QUAD_SIZE],
1889 const float t[TGSI_QUAD_SIZE],
1890 const float p[TGSI_QUAD_SIZE],
1891 unsigned level,
1892 const float dudx, const float dvdx,
1893 const float dudy, const float dvdy,
1894 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1895 {
1896 const struct pipe_resource *texture = samp->view->texture;
1897
1898 // ??? Won't the image filters blow up if level is negative?
1899 unsigned level0 = level > 0 ? level : 0;
1900 float scaling = 1.0 / (1 << level0);
1901 int width = u_minify(texture->width0, level0);
1902 int height = u_minify(texture->height0, level0);
1903
1904 float ux = dudx * scaling;
1905 float vx = dvdx * scaling;
1906 float uy = dudy * scaling;
1907 float vy = dvdy * scaling;
1908
1909 /* compute ellipse coefficients to bound the region:
1910 * A*x*x + B*x*y + C*y*y = F.
1911 */
1912 float A = vx*vx+vy*vy+1;
1913 float B = -2*(ux*vx+uy*vy);
1914 float C = ux*ux+uy*uy+1;
1915 float F = A*C-B*B/4.0;
1916
1917 /* check if it is an ellipse */
1918 /* ASSERT(F > 0.0); */
1919
1920 /* Compute the ellipse's (u,v) bounding box in texture space */
1921 float d = -B*B+4.0*C*A;
1922 float box_u = 2.0 / d * sqrt(d*C*F); /* box_u -> half of bbox with */
1923 float box_v = 2.0 / d * sqrt(A*d*F); /* box_v -> half of bbox height */
1924
1925 float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
1926 float s_buffer[TGSI_QUAD_SIZE];
1927 float t_buffer[TGSI_QUAD_SIZE];
1928 float weight_buffer[TGSI_QUAD_SIZE];
1929 unsigned buffer_next;
1930 int j;
1931 float den; /* = 0.0F; */
1932 float ddq;
1933 float U; /* = u0 - tex_u; */
1934 int v;
1935
1936 /* Scale ellipse formula to directly index the Filter Lookup Table.
1937 * i.e. scale so that F = WEIGHT_LUT_SIZE-1
1938 */
1939 double formScale = (double) (WEIGHT_LUT_SIZE - 1) / F;
1940 A *= formScale;
1941 B *= formScale;
1942 C *= formScale;
1943 /* F *= formScale; */ /* no need to scale F as we don't use it below here */
1944
1945 /* For each quad, the du and dx values are the same and so the ellipse is
1946 * also the same. Note that texel/image access can only be performed using
1947 * a quad, i.e. it is not possible to get the pixel value for a single
1948 * tex coord. In order to have a better performance, the access is buffered
1949 * using the s_buffer/t_buffer and weight_buffer. Only when the buffer is
1950 * full, then the pixel values are read from the image.
1951 */
1952 ddq = 2 * A;
1953
1954 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1955 /* Heckbert MS thesis, p. 59; scan over the bounding box of the ellipse
1956 * and incrementally update the value of Ax^2+Bxy*Cy^2; when this
1957 * value, q, is less than F, we're inside the ellipse
1958 */
1959 float tex_u = -0.5F + s[j] * texture->width0 * scaling;
1960 float tex_v = -0.5F + t[j] * texture->height0 * scaling;
1961
1962 int u0 = (int) floorf(tex_u - box_u);
1963 int u1 = (int) ceilf(tex_u + box_u);
1964 int v0 = (int) floorf(tex_v - box_v);
1965 int v1 = (int) ceilf(tex_v + box_v);
1966
1967 float num[4] = {0.0F, 0.0F, 0.0F, 0.0F};
1968 buffer_next = 0;
1969 den = 0;
1970 U = u0 - tex_u;
1971 for (v = v0; v <= v1; ++v) {
1972 float V = v - tex_v;
1973 float dq = A * (2 * U + 1) + B * V;
1974 float q = (C * V + B * U) * V + A * U * U;
1975
1976 int u;
1977 for (u = u0; u <= u1; ++u) {
1978 /* Note that the ellipse has been pre-scaled so F =
1979 * WEIGHT_LUT_SIZE - 1
1980 */
1981 if (q < WEIGHT_LUT_SIZE) {
1982 /* as a LUT is used, q must never be negative;
1983 * should not happen, though
1984 */
1985 const int qClamped = q >= 0.0F ? q : 0;
1986 float weight = weightLut[qClamped];
1987
1988 weight_buffer[buffer_next] = weight;
1989 s_buffer[buffer_next] = u / ((float) width);
1990 t_buffer[buffer_next] = v / ((float) height);
1991
1992 buffer_next++;
1993 if (buffer_next == TGSI_QUAD_SIZE) {
1994 /* 4 texel coords are in the buffer -> read it now */
1995 unsigned jj;
1996 /* it is assumed that samp->min_img_filter is set to
1997 * img_filter_2d_nearest or one of the
1998 * accelerated img_filter_2d_nearest_XXX functions.
1999 */
2000 for (jj = 0; jj < buffer_next; jj++) {
2001 samp->min_img_filter(samp, s_buffer[jj], t_buffer[jj], p[jj],
2002 level, samp->faces[j], &rgba_temp[0][jj]);
2003 num[0] += weight_buffer[jj] * rgba_temp[0][jj];
2004 num[1] += weight_buffer[jj] * rgba_temp[1][jj];
2005 num[2] += weight_buffer[jj] * rgba_temp[2][jj];
2006 num[3] += weight_buffer[jj] * rgba_temp[3][jj];
2007 }
2008
2009 buffer_next = 0;
2010 }
2011
2012 den += weight;
2013 }
2014 q += dq;
2015 dq += ddq;
2016 }
2017 }
2018
2019 /* if the tex coord buffer contains unread values, we will read
2020 * them now.
2021 */
2022 if (buffer_next > 0) {
2023 unsigned jj;
2024 /* it is assumed that samp->min_img_filter is set to
2025 * img_filter_2d_nearest or one of the
2026 * accelerated img_filter_2d_nearest_XXX functions.
2027 */
2028 for (jj = 0; jj < buffer_next; jj++) {
2029 samp->min_img_filter(samp, s_buffer[jj], t_buffer[jj], p[jj], level,
2030 samp->faces[j], &rgba_temp[0][jj]);
2031 num[0] += weight_buffer[jj] * rgba_temp[0][jj];
2032 num[1] += weight_buffer[jj] * rgba_temp[1][jj];
2033 num[2] += weight_buffer[jj] * rgba_temp[2][jj];
2034 num[3] += weight_buffer[jj] * rgba_temp[3][jj];
2035 }
2036 }
2037
2038 if (den <= 0.0F) {
2039 /* Reaching this place would mean that no pixels intersected
2040 * the ellipse. This should never happen because the filter
2041 * we use always intersects at least one pixel.
2042 */
2043
2044 /*rgba[0]=0;
2045 rgba[1]=0;
2046 rgba[2]=0;
2047 rgba[3]=0;*/
2048 /* not enough pixels in resampling, resort to direct interpolation */
2049 samp->min_img_filter(samp, s[j], t[j], p[j], level, samp->faces[j],
2050 &rgba_temp[0][j]);
2051 den = 1;
2052 num[0] = rgba_temp[0][j];
2053 num[1] = rgba_temp[1][j];
2054 num[2] = rgba_temp[2][j];
2055 num[3] = rgba_temp[3][j];
2056 }
2057
2058 rgba[0][j] = num[0] / den;
2059 rgba[1][j] = num[1] / den;
2060 rgba[2][j] = num[2] / den;
2061 rgba[3][j] = num[3] / den;
2062 }
2063 }
2064
2065
2066 /**
2067 * Sample 2D texture using an anisotropic filter.
2068 */
2069 static void
2070 mip_filter_linear_aniso(struct sp_sampler_variant *samp,
2071 const float s[TGSI_QUAD_SIZE],
2072 const float t[TGSI_QUAD_SIZE],
2073 const float p[TGSI_QUAD_SIZE],
2074 const float c0[TGSI_QUAD_SIZE],
2075 const float lod_in[TGSI_QUAD_SIZE],
2076 enum tgsi_sampler_control control,
2077 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2078 {
2079 const struct pipe_resource *texture = samp->view->texture;
2080 int level0;
2081 float lambda;
2082 float lod[TGSI_QUAD_SIZE];
2083
2084 float s_to_u = u_minify(texture->width0, samp->view->u.tex.first_level);
2085 float t_to_v = u_minify(texture->height0, samp->view->u.tex.first_level);
2086 float dudx = (s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]) * s_to_u;
2087 float dudy = (s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]) * s_to_u;
2088 float dvdx = (t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]) * t_to_v;
2089 float dvdy = (t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]) * t_to_v;
2090
2091 if (control == tgsi_sampler_lod_bias ||
2092 control == tgsi_sampler_lod_none ||
2093 /* XXX FIXME */
2094 control == tgsi_sampler_derivs_explicit) {
2095 /* note: instead of working with Px and Py, we will use the
2096 * squared length instead, to avoid sqrt.
2097 */
2098 float Px2 = dudx * dudx + dvdx * dvdx;
2099 float Py2 = dudy * dudy + dvdy * dvdy;
2100
2101 float Pmax2;
2102 float Pmin2;
2103 float e;
2104 const float maxEccentricity = samp->sampler->max_anisotropy * samp->sampler->max_anisotropy;
2105
2106 if (Px2 < Py2) {
2107 Pmax2 = Py2;
2108 Pmin2 = Px2;
2109 }
2110 else {
2111 Pmax2 = Px2;
2112 Pmin2 = Py2;
2113 }
2114
2115 /* if the eccentricity of the ellipse is too big, scale up the shorter
2116 * of the two vectors to limit the maximum amount of work per pixel
2117 */
2118 e = Pmax2 / Pmin2;
2119 if (e > maxEccentricity) {
2120 /* float s=e / maxEccentricity;
2121 minor[0] *= s;
2122 minor[1] *= s;
2123 Pmin2 *= s; */
2124 Pmin2 = Pmax2 / maxEccentricity;
2125 }
2126
2127 /* note: we need to have Pmin=sqrt(Pmin2) here, but we can avoid
2128 * this since 0.5*log(x) = log(sqrt(x))
2129 */
2130 lambda = 0.5F * util_fast_log2(Pmin2) + samp->sampler->lod_bias;
2131 compute_lod(samp->sampler, control, lambda, lod_in, lod);
2132 }
2133 else {
2134 assert(control == tgsi_sampler_lod_explicit ||
2135 control == tgsi_sampler_lod_zero);
2136 compute_lod(samp->sampler, control, samp->sampler->lod_bias, lod_in, lod);
2137 }
2138
2139 /* XXX: Take into account all lod values.
2140 */
2141 lambda = lod[0];
2142 level0 = samp->view->u.tex.first_level + (int)lambda;
2143
2144 /* If the ellipse covers the whole image, we can
2145 * simply return the average of the whole image.
2146 */
2147 if (level0 >= (int) texture->last_level) {
2148 int j;
2149 for (j = 0; j < TGSI_QUAD_SIZE; j++)
2150 samp->min_img_filter(samp, s[j], t[j], p[j], texture->last_level,
2151 samp->faces[j], &rgba[0][j]);
2152 }
2153 else {
2154 /* don't bother interpolating between multiple LODs; it doesn't
2155 * seem to be worth the extra running time.
2156 */
2157 img_filter_2d_ewa(samp, s, t, p, level0,
2158 dudx, dvdx, dudy, dvdy, rgba);
2159 }
2160
2161 if (DEBUG_TEX) {
2162 print_sample_4(__FUNCTION__, rgba);
2163 }
2164 }
2165
2166
2167 /**
2168 * Specialized version of mip_filter_linear with hard-wired calls to
2169 * 2d lambda calculation and 2d_linear_repeat_POT img filters.
2170 */
2171 static void
2172 mip_filter_linear_2d_linear_repeat_POT(
2173 struct sp_sampler_variant *samp,
2174 const float s[TGSI_QUAD_SIZE],
2175 const float t[TGSI_QUAD_SIZE],
2176 const float p[TGSI_QUAD_SIZE],
2177 const float c0[TGSI_QUAD_SIZE],
2178 const float lod_in[TGSI_QUAD_SIZE],
2179 enum tgsi_sampler_control control,
2180 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2181 {
2182 const struct pipe_resource *texture = samp->view->texture;
2183 int j;
2184 float lod[TGSI_QUAD_SIZE];
2185
2186 compute_lambda_lod(samp, s, t, p, lod_in, control, lod);
2187
2188 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2189 int level0 = samp->view->u.tex.first_level + (int)lod[j];
2190
2191 /* Catches both negative and large values of level0:
2192 */
2193 if ((unsigned)level0 >= texture->last_level) {
2194 if (level0 < 0)
2195 img_filter_2d_linear_repeat_POT(samp, s[j], t[j], p[j],
2196 samp->view->u.tex.first_level,
2197 samp->faces[j], &rgba[0][j]);
2198 else
2199 img_filter_2d_linear_repeat_POT(samp, s[j], t[j], p[j],
2200 samp->view->texture->last_level,
2201 samp->faces[j], &rgba[0][j]);
2202
2203 }
2204 else {
2205 float levelBlend = frac(lod[j]);
2206 float rgbax[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2207 int c;
2208
2209 img_filter_2d_linear_repeat_POT(samp, s[j], t[j], p[j], level0,
2210 samp->faces[j], &rgbax[0][0]);
2211 img_filter_2d_linear_repeat_POT(samp, s[j], t[j], p[j], level0+1,
2212 samp->faces[j], &rgbax[0][1]);
2213
2214 for (c = 0; c < TGSI_NUM_CHANNELS; c++)
2215 rgba[c][j] = lerp(levelBlend, rgbax[c][0], rgbax[c][1]);
2216 }
2217 }
2218
2219 if (DEBUG_TEX) {
2220 print_sample_4(__FUNCTION__, rgba);
2221 }
2222 }
2223
2224
2225 /**
2226 * Do shadow/depth comparisons.
2227 */
2228 static void
2229 sample_compare(struct sp_sampler_variant *samp,
2230 const float s[TGSI_QUAD_SIZE],
2231 const float t[TGSI_QUAD_SIZE],
2232 const float p[TGSI_QUAD_SIZE],
2233 const float c0[TGSI_QUAD_SIZE],
2234 const float c1[TGSI_QUAD_SIZE],
2235 enum tgsi_sampler_control control,
2236 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2237 {
2238 const struct pipe_sampler_state *sampler = samp->sampler;
2239 int j, k0, k1, k2, k3;
2240 float val;
2241 float pc0, pc1, pc2, pc3;
2242
2243 samp->mip_filter(samp, s, t, p, c0, c1, control, rgba);
2244
2245 /**
2246 * Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
2247 * for 2D Array texture we need to use the 'c0' (aka Q).
2248 * When we sampled the depth texture, the depth value was put into all
2249 * RGBA channels. We look at the red channel here.
2250 */
2251
2252 if (samp->view->texture->target == PIPE_TEXTURE_2D_ARRAY ||
2253 samp->view->texture->target == PIPE_TEXTURE_CUBE) {
2254 pc0 = CLAMP(c0[0], 0.0F, 1.0F);
2255 pc1 = CLAMP(c0[1], 0.0F, 1.0F);
2256 pc2 = CLAMP(c0[2], 0.0F, 1.0F);
2257 pc3 = CLAMP(c0[3], 0.0F, 1.0F);
2258 } else if (samp->view->texture->target == PIPE_TEXTURE_CUBE_ARRAY) {
2259 pc0 = CLAMP(c1[0], 0.0F, 1.0F);
2260 pc1 = CLAMP(c1[1], 0.0F, 1.0F);
2261 pc2 = CLAMP(c1[2], 0.0F, 1.0F);
2262 pc3 = CLAMP(c1[3], 0.0F, 1.0F);
2263 } else {
2264 pc0 = CLAMP(p[0], 0.0F, 1.0F);
2265 pc1 = CLAMP(p[1], 0.0F, 1.0F);
2266 pc2 = CLAMP(p[2], 0.0F, 1.0F);
2267 pc3 = CLAMP(p[3], 0.0F, 1.0F);
2268 }
2269 /* compare four texcoords vs. four texture samples */
2270 switch (sampler->compare_func) {
2271 case PIPE_FUNC_LESS:
2272 k0 = pc0 < rgba[0][0];
2273 k1 = pc1 < rgba[0][1];
2274 k2 = pc2 < rgba[0][2];
2275 k3 = pc3 < rgba[0][3];
2276 break;
2277 case PIPE_FUNC_LEQUAL:
2278 k0 = pc0 <= rgba[0][0];
2279 k1 = pc1 <= rgba[0][1];
2280 k2 = pc2 <= rgba[0][2];
2281 k3 = pc3 <= rgba[0][3];
2282 break;
2283 case PIPE_FUNC_GREATER:
2284 k0 = pc0 > rgba[0][0];
2285 k1 = pc1 > rgba[0][1];
2286 k2 = pc2 > rgba[0][2];
2287 k3 = pc3 > rgba[0][3];
2288 break;
2289 case PIPE_FUNC_GEQUAL:
2290 k0 = pc0 >= rgba[0][0];
2291 k1 = pc1 >= rgba[0][1];
2292 k2 = pc2 >= rgba[0][2];
2293 k3 = pc3 >= rgba[0][3];
2294 break;
2295 case PIPE_FUNC_EQUAL:
2296 k0 = pc0 == rgba[0][0];
2297 k1 = pc1 == rgba[0][1];
2298 k2 = pc2 == rgba[0][2];
2299 k3 = pc3 == rgba[0][3];
2300 break;
2301 case PIPE_FUNC_NOTEQUAL:
2302 k0 = pc0 != rgba[0][0];
2303 k1 = pc1 != rgba[0][1];
2304 k2 = pc2 != rgba[0][2];
2305 k3 = pc3 != rgba[0][3];
2306 break;
2307 case PIPE_FUNC_ALWAYS:
2308 k0 = k1 = k2 = k3 = 1;
2309 break;
2310 case PIPE_FUNC_NEVER:
2311 k0 = k1 = k2 = k3 = 0;
2312 break;
2313 default:
2314 k0 = k1 = k2 = k3 = 0;
2315 assert(0);
2316 break;
2317 }
2318
2319 if (sampler->mag_img_filter == PIPE_TEX_FILTER_LINEAR) {
2320 /* convert four pass/fail values to an intensity in [0,1] */
2321 val = 0.25F * (k0 + k1 + k2 + k3);
2322
2323 /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
2324 for (j = 0; j < 4; j++) {
2325 rgba[0][j] = rgba[1][j] = rgba[2][j] = val;
2326 rgba[3][j] = 1.0F;
2327 }
2328 } else {
2329 for (j = 0; j < 4; j++) {
2330 rgba[0][j] = k0;
2331 rgba[1][j] = k1;
2332 rgba[2][j] = k2;
2333 rgba[3][j] = 1.0F;
2334 }
2335 }
2336 }
2337
2338
2339 /**
2340 * Use 3D texcoords to choose a cube face, then sample the 2D cube faces.
2341 * Put face info into the sampler faces[] array.
2342 */
2343 static void
2344 sample_cube(struct sp_sampler_variant *samp,
2345 const float s[TGSI_QUAD_SIZE],
2346 const float t[TGSI_QUAD_SIZE],
2347 const float p[TGSI_QUAD_SIZE],
2348 const float c0[TGSI_QUAD_SIZE],
2349 const float c1[TGSI_QUAD_SIZE],
2350 enum tgsi_sampler_control control,
2351 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2352 {
2353 unsigned j;
2354 float ssss[4], tttt[4];
2355
2356 /* Not actually used, but the intermediate steps that do the
2357 * dereferencing don't know it.
2358 */
2359 static float pppp[4] = { 0, 0, 0, 0 };
2360
2361 pppp[0] = c0[0];
2362 pppp[1] = c0[1];
2363 pppp[2] = c0[2];
2364 pppp[3] = c0[3];
2365 /*
2366 major axis
2367 direction target sc tc ma
2368 ---------- ------------------------------- --- --- ---
2369 +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx
2370 -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx
2371 +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry
2372 -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry
2373 +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz
2374 -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz
2375 */
2376
2377 /* Choose the cube face and compute new s/t coords for the 2D face.
2378 *
2379 * Use the same cube face for all four pixels in the quad.
2380 *
2381 * This isn't ideal, but if we want to use a different cube face
2382 * per pixel in the quad, we'd have to also compute the per-face
2383 * LOD here too. That's because the four post-face-selection
2384 * texcoords are no longer related to each other (they're
2385 * per-face!) so we can't use subtraction to compute the partial
2386 * deriviates to compute the LOD. Doing so (near cube edges
2387 * anyway) gives us pretty much random values.
2388 */
2389 {
2390 /* use the average of the four pixel's texcoords to choose the face */
2391 const float rx = 0.25F * (s[0] + s[1] + s[2] + s[3]);
2392 const float ry = 0.25F * (t[0] + t[1] + t[2] + t[3]);
2393 const float rz = 0.25F * (p[0] + p[1] + p[2] + p[3]);
2394 const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz);
2395
2396 if (arx >= ary && arx >= arz) {
2397 float sign = (rx >= 0.0F) ? 1.0F : -1.0F;
2398 uint face = (rx >= 0.0F) ? PIPE_TEX_FACE_POS_X : PIPE_TEX_FACE_NEG_X;
2399 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2400 const float ima = -0.5F / fabsf(s[j]);
2401 ssss[j] = sign * p[j] * ima + 0.5F;
2402 tttt[j] = t[j] * ima + 0.5F;
2403 samp->faces[j] = face;
2404 }
2405 }
2406 else if (ary >= arx && ary >= arz) {
2407 float sign = (ry >= 0.0F) ? 1.0F : -1.0F;
2408 uint face = (ry >= 0.0F) ? PIPE_TEX_FACE_POS_Y : PIPE_TEX_FACE_NEG_Y;
2409 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2410 const float ima = -0.5F / fabsf(t[j]);
2411 ssss[j] = -s[j] * ima + 0.5F;
2412 tttt[j] = sign * -p[j] * ima + 0.5F;
2413 samp->faces[j] = face;
2414 }
2415 }
2416 else {
2417 float sign = (rz >= 0.0F) ? 1.0F : -1.0F;
2418 uint face = (rz >= 0.0F) ? PIPE_TEX_FACE_POS_Z : PIPE_TEX_FACE_NEG_Z;
2419 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2420 const float ima = -0.5F / fabsf(p[j]);
2421 ssss[j] = sign * -s[j] * ima + 0.5F;
2422 tttt[j] = t[j] * ima + 0.5F;
2423 samp->faces[j] = face;
2424 }
2425 }
2426 }
2427
2428 /* In our little pipeline, the compare stage is next. If compare
2429 * is not active, this will point somewhere deeper into the
2430 * pipeline, eg. to mip_filter or even img_filter.
2431 */
2432 samp->compare(samp, ssss, tttt, pppp, c0, c1, control, rgba);
2433 }
2434
2435
2436 static void
2437 do_swizzling(const struct sp_sampler_variant *samp,
2438 float in[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],
2439 float out[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2440 {
2441 int j;
2442 const unsigned swizzle_r = samp->key.bits.swizzle_r;
2443 const unsigned swizzle_g = samp->key.bits.swizzle_g;
2444 const unsigned swizzle_b = samp->key.bits.swizzle_b;
2445 const unsigned swizzle_a = samp->key.bits.swizzle_a;
2446
2447 switch (swizzle_r) {
2448 case PIPE_SWIZZLE_ZERO:
2449 for (j = 0; j < 4; j++)
2450 out[0][j] = 0.0f;
2451 break;
2452 case PIPE_SWIZZLE_ONE:
2453 for (j = 0; j < 4; j++)
2454 out[0][j] = 1.0f;
2455 break;
2456 default:
2457 assert(swizzle_r < 4);
2458 for (j = 0; j < 4; j++)
2459 out[0][j] = in[swizzle_r][j];
2460 }
2461
2462 switch (swizzle_g) {
2463 case PIPE_SWIZZLE_ZERO:
2464 for (j = 0; j < 4; j++)
2465 out[1][j] = 0.0f;
2466 break;
2467 case PIPE_SWIZZLE_ONE:
2468 for (j = 0; j < 4; j++)
2469 out[1][j] = 1.0f;
2470 break;
2471 default:
2472 assert(swizzle_g < 4);
2473 for (j = 0; j < 4; j++)
2474 out[1][j] = in[swizzle_g][j];
2475 }
2476
2477 switch (swizzle_b) {
2478 case PIPE_SWIZZLE_ZERO:
2479 for (j = 0; j < 4; j++)
2480 out[2][j] = 0.0f;
2481 break;
2482 case PIPE_SWIZZLE_ONE:
2483 for (j = 0; j < 4; j++)
2484 out[2][j] = 1.0f;
2485 break;
2486 default:
2487 assert(swizzle_b < 4);
2488 for (j = 0; j < 4; j++)
2489 out[2][j] = in[swizzle_b][j];
2490 }
2491
2492 switch (swizzle_a) {
2493 case PIPE_SWIZZLE_ZERO:
2494 for (j = 0; j < 4; j++)
2495 out[3][j] = 0.0f;
2496 break;
2497 case PIPE_SWIZZLE_ONE:
2498 for (j = 0; j < 4; j++)
2499 out[3][j] = 1.0f;
2500 break;
2501 default:
2502 assert(swizzle_a < 4);
2503 for (j = 0; j < 4; j++)
2504 out[3][j] = in[swizzle_a][j];
2505 }
2506 }
2507
2508
2509 static void
2510 sample_swizzle(struct sp_sampler_variant *samp,
2511 const float s[TGSI_QUAD_SIZE],
2512 const float t[TGSI_QUAD_SIZE],
2513 const float p[TGSI_QUAD_SIZE],
2514 const float c0[TGSI_QUAD_SIZE],
2515 const float c1[TGSI_QUAD_SIZE],
2516 enum tgsi_sampler_control control,
2517 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2518 {
2519 float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2520
2521 samp->sample_target(samp, s, t, p, c0, c1, control, rgba_temp);
2522
2523 do_swizzling(samp, rgba_temp, rgba);
2524 }
2525
2526
2527 static wrap_nearest_func
2528 get_nearest_unorm_wrap(unsigned mode)
2529 {
2530 switch (mode) {
2531 case PIPE_TEX_WRAP_CLAMP:
2532 return wrap_nearest_unorm_clamp;
2533 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2534 return wrap_nearest_unorm_clamp_to_edge;
2535 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2536 return wrap_nearest_unorm_clamp_to_border;
2537 default:
2538 assert(0);
2539 return wrap_nearest_unorm_clamp;
2540 }
2541 }
2542
2543
2544 static wrap_nearest_func
2545 get_nearest_wrap(unsigned mode)
2546 {
2547 switch (mode) {
2548 case PIPE_TEX_WRAP_REPEAT:
2549 return wrap_nearest_repeat;
2550 case PIPE_TEX_WRAP_CLAMP:
2551 return wrap_nearest_clamp;
2552 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2553 return wrap_nearest_clamp_to_edge;
2554 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2555 return wrap_nearest_clamp_to_border;
2556 case PIPE_TEX_WRAP_MIRROR_REPEAT:
2557 return wrap_nearest_mirror_repeat;
2558 case PIPE_TEX_WRAP_MIRROR_CLAMP:
2559 return wrap_nearest_mirror_clamp;
2560 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
2561 return wrap_nearest_mirror_clamp_to_edge;
2562 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
2563 return wrap_nearest_mirror_clamp_to_border;
2564 default:
2565 assert(0);
2566 return wrap_nearest_repeat;
2567 }
2568 }
2569
2570
2571 static wrap_linear_func
2572 get_linear_unorm_wrap(unsigned mode)
2573 {
2574 switch (mode) {
2575 case PIPE_TEX_WRAP_CLAMP:
2576 return wrap_linear_unorm_clamp;
2577 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2578 return wrap_linear_unorm_clamp_to_edge;
2579 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2580 return wrap_linear_unorm_clamp_to_border;
2581 default:
2582 assert(0);
2583 return wrap_linear_unorm_clamp;
2584 }
2585 }
2586
2587
2588 static wrap_linear_func
2589 get_linear_wrap(unsigned mode)
2590 {
2591 switch (mode) {
2592 case PIPE_TEX_WRAP_REPEAT:
2593 return wrap_linear_repeat;
2594 case PIPE_TEX_WRAP_CLAMP:
2595 return wrap_linear_clamp;
2596 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2597 return wrap_linear_clamp_to_edge;
2598 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2599 return wrap_linear_clamp_to_border;
2600 case PIPE_TEX_WRAP_MIRROR_REPEAT:
2601 return wrap_linear_mirror_repeat;
2602 case PIPE_TEX_WRAP_MIRROR_CLAMP:
2603 return wrap_linear_mirror_clamp;
2604 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
2605 return wrap_linear_mirror_clamp_to_edge;
2606 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
2607 return wrap_linear_mirror_clamp_to_border;
2608 default:
2609 assert(0);
2610 return wrap_linear_repeat;
2611 }
2612 }
2613
2614
2615 /**
2616 * Is swizzling needed for the given state key?
2617 */
2618 static INLINE bool
2619 any_swizzle(union sp_sampler_key key)
2620 {
2621 return (key.bits.swizzle_r != PIPE_SWIZZLE_RED ||
2622 key.bits.swizzle_g != PIPE_SWIZZLE_GREEN ||
2623 key.bits.swizzle_b != PIPE_SWIZZLE_BLUE ||
2624 key.bits.swizzle_a != PIPE_SWIZZLE_ALPHA);
2625 }
2626
2627
2628 static compute_lambda_func
2629 get_lambda_func(const union sp_sampler_key key)
2630 {
2631 if (key.bits.processor == TGSI_PROCESSOR_VERTEX)
2632 return compute_lambda_vert;
2633
2634 switch (key.bits.target) {
2635 case PIPE_BUFFER:
2636 case PIPE_TEXTURE_1D:
2637 case PIPE_TEXTURE_1D_ARRAY:
2638 return compute_lambda_1d;
2639 case PIPE_TEXTURE_2D:
2640 case PIPE_TEXTURE_2D_ARRAY:
2641 case PIPE_TEXTURE_RECT:
2642 case PIPE_TEXTURE_CUBE:
2643 case PIPE_TEXTURE_CUBE_ARRAY:
2644 return compute_lambda_2d;
2645 case PIPE_TEXTURE_3D:
2646 return compute_lambda_3d;
2647 default:
2648 assert(0);
2649 return compute_lambda_1d;
2650 }
2651 }
2652
2653
2654 static img_filter_func
2655 get_img_filter(const union sp_sampler_key key,
2656 unsigned filter,
2657 const struct pipe_sampler_state *sampler)
2658 {
2659 switch (key.bits.target) {
2660 case PIPE_BUFFER:
2661 case PIPE_TEXTURE_1D:
2662 if (filter == PIPE_TEX_FILTER_NEAREST)
2663 return img_filter_1d_nearest;
2664 else
2665 return img_filter_1d_linear;
2666 break;
2667 case PIPE_TEXTURE_1D_ARRAY:
2668 if (filter == PIPE_TEX_FILTER_NEAREST)
2669 return img_filter_1d_array_nearest;
2670 else
2671 return img_filter_1d_array_linear;
2672 break;
2673 case PIPE_TEXTURE_2D:
2674 case PIPE_TEXTURE_RECT:
2675 /* Try for fast path:
2676 */
2677 if (key.bits.is_pot &&
2678 sampler->wrap_s == sampler->wrap_t &&
2679 sampler->normalized_coords)
2680 {
2681 switch (sampler->wrap_s) {
2682 case PIPE_TEX_WRAP_REPEAT:
2683 switch (filter) {
2684 case PIPE_TEX_FILTER_NEAREST:
2685 return img_filter_2d_nearest_repeat_POT;
2686 case PIPE_TEX_FILTER_LINEAR:
2687 return img_filter_2d_linear_repeat_POT;
2688 default:
2689 break;
2690 }
2691 break;
2692 case PIPE_TEX_WRAP_CLAMP:
2693 switch (filter) {
2694 case PIPE_TEX_FILTER_NEAREST:
2695 return img_filter_2d_nearest_clamp_POT;
2696 default:
2697 break;
2698 }
2699 }
2700 }
2701 /* Otherwise use default versions:
2702 */
2703 if (filter == PIPE_TEX_FILTER_NEAREST)
2704 return img_filter_2d_nearest;
2705 else
2706 return img_filter_2d_linear;
2707 break;
2708 case PIPE_TEXTURE_2D_ARRAY:
2709 if (filter == PIPE_TEX_FILTER_NEAREST)
2710 return img_filter_2d_array_nearest;
2711 else
2712 return img_filter_2d_array_linear;
2713 break;
2714 case PIPE_TEXTURE_CUBE:
2715 if (filter == PIPE_TEX_FILTER_NEAREST)
2716 return img_filter_cube_nearest;
2717 else
2718 return img_filter_cube_linear;
2719 break;
2720 case PIPE_TEXTURE_CUBE_ARRAY:
2721 if (filter == PIPE_TEX_FILTER_NEAREST)
2722 return img_filter_cube_array_nearest;
2723 else
2724 return img_filter_cube_array_linear;
2725 break;
2726 case PIPE_TEXTURE_3D:
2727 if (filter == PIPE_TEX_FILTER_NEAREST)
2728 return img_filter_3d_nearest;
2729 else
2730 return img_filter_3d_linear;
2731 break;
2732 default:
2733 assert(0);
2734 return img_filter_1d_nearest;
2735 }
2736 }
2737
2738
2739 /**
2740 * Bind the given texture object and texture cache to the sampler variant.
2741 */
2742 void
2743 sp_sampler_variant_bind_view( struct sp_sampler_variant *samp,
2744 struct softpipe_tex_tile_cache *tex_cache,
2745 const struct pipe_sampler_view *view )
2746 {
2747 const struct pipe_resource *texture = view->texture;
2748
2749 samp->view = view;
2750 samp->cache = tex_cache;
2751 samp->xpot = util_logbase2( texture->width0 );
2752 samp->ypot = util_logbase2( texture->height0 );
2753 }
2754
2755
2756 void
2757 sp_sampler_variant_destroy( struct sp_sampler_variant *samp )
2758 {
2759 FREE(samp);
2760 }
2761
2762
2763 static void
2764 sample_get_dims(struct sp_sampler_variant *samp, int level,
2765 int dims[4])
2766 {
2767 const struct pipe_sampler_view *view = samp->view;
2768 const struct pipe_resource *texture = view->texture;
2769
2770 /* undefined according to EXT_gpu_program */
2771 level += view->u.tex.first_level;
2772 if (level > view->u.tex.last_level)
2773 return;
2774
2775 dims[0] = u_minify(texture->width0, level);
2776
2777 switch(texture->target) {
2778 case PIPE_TEXTURE_1D_ARRAY:
2779 dims[1] = texture->array_size;
2780 /* fallthrough */
2781 case PIPE_TEXTURE_1D:
2782 return;
2783 case PIPE_TEXTURE_2D_ARRAY:
2784 dims[2] = texture->array_size;
2785 /* fallthrough */
2786 case PIPE_TEXTURE_2D:
2787 case PIPE_TEXTURE_CUBE:
2788 case PIPE_TEXTURE_RECT:
2789 dims[1] = u_minify(texture->height0, level);
2790 return;
2791 case PIPE_TEXTURE_3D:
2792 dims[1] = u_minify(texture->height0, level);
2793 dims[2] = u_minify(texture->depth0, level);
2794 return;
2795 case PIPE_TEXTURE_CUBE_ARRAY:
2796 dims[1] = u_minify(texture->height0, level);
2797 dims[2] = texture->array_size / 6;
2798 break;
2799 case PIPE_BUFFER:
2800 dims[0] /= util_format_get_blocksize(view->format);
2801 return;
2802 default:
2803 assert(!"unexpected texture target in sample_get_dims()");
2804 return;
2805 }
2806 }
2807
2808 /**
2809 * This function is only used for getting unfiltered texels via the
2810 * TXF opcode. The GL spec says that out-of-bounds texel fetches
2811 * produce undefined results. Instead of crashing, lets just clamp
2812 * coords to the texture image size.
2813 */
2814 static void
2815 sample_get_texels(struct sp_sampler_variant *samp,
2816 const int v_i[TGSI_QUAD_SIZE],
2817 const int v_j[TGSI_QUAD_SIZE],
2818 const int v_k[TGSI_QUAD_SIZE],
2819 const int lod[TGSI_QUAD_SIZE],
2820 const int8_t offset[3],
2821 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2822 {
2823 union tex_tile_address addr;
2824 const struct pipe_resource *texture = samp->view->texture;
2825 int j, c;
2826 const float *tx;
2827 const bool need_swizzle = any_swizzle(samp->key);
2828 int width, height, depth, layers;
2829
2830 addr.value = 0;
2831 /* TODO write a better test for LOD */
2832 addr.bits.level = lod[0];
2833
2834 width = u_minify(texture->width0, addr.bits.level);
2835 height = u_minify(texture->height0, addr.bits.level);
2836 depth = u_minify(texture->depth0, addr.bits.level);
2837 layers = texture->array_size;
2838
2839 switch(texture->target) {
2840 case PIPE_BUFFER:
2841 case PIPE_TEXTURE_1D:
2842 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2843 int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
2844 tx = get_texel_2d(samp, addr, x, 0);
2845 for (c = 0; c < 4; c++) {
2846 rgba[c][j] = tx[c];
2847 }
2848 }
2849 break;
2850 case PIPE_TEXTURE_1D_ARRAY:
2851 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2852 int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
2853 int y = CLAMP(v_j[j], 0, layers - 1);
2854 tx = get_texel_1d_array(samp, addr, x, y);
2855 for (c = 0; c < 4; c++) {
2856 rgba[c][j] = tx[c];
2857 }
2858 }
2859 break;
2860 case PIPE_TEXTURE_2D:
2861 case PIPE_TEXTURE_RECT:
2862 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2863 int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
2864 int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
2865 tx = get_texel_2d(samp, addr, x, y);
2866 for (c = 0; c < 4; c++) {
2867 rgba[c][j] = tx[c];
2868 }
2869 }
2870 break;
2871 case PIPE_TEXTURE_2D_ARRAY:
2872 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2873 int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
2874 int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
2875 int layer = CLAMP(v_k[j], 0, layers - 1);
2876 tx = get_texel_2d_array(samp, addr, x, y, layer);
2877 for (c = 0; c < 4; c++) {
2878 rgba[c][j] = tx[c];
2879 }
2880 }
2881 break;
2882 case PIPE_TEXTURE_3D:
2883 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2884 int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
2885 int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
2886 int z = CLAMP(v_k[j] + offset[2], 0, depth - 1);
2887
2888 tx = get_texel_3d(samp, addr, x, y, z);
2889 for (c = 0; c < 4; c++) {
2890 rgba[c][j] = tx[c];
2891 }
2892 }
2893 break;
2894 case PIPE_TEXTURE_CUBE: /* TXF can't work on CUBE according to spec */
2895 default:
2896 assert(!"Unknown or CUBE texture type in TXF processing\n");
2897 break;
2898 }
2899
2900 if (need_swizzle) {
2901 float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2902 memcpy(rgba_temp, rgba, sizeof(rgba_temp));
2903 do_swizzling(samp, rgba_temp, rgba);
2904 }
2905 }
2906
2907
2908 /**
2909 * Create a sampler variant for a given set of non-orthogonal state.
2910 */
2911 struct sp_sampler_variant *
2912 sp_create_sampler_variant( const struct pipe_sampler_state *sampler,
2913 const union sp_sampler_key key )
2914 {
2915 struct sp_sampler_variant *samp = CALLOC_STRUCT(sp_sampler_variant);
2916 if (!samp)
2917 return NULL;
2918
2919 samp->sampler = sampler;
2920 samp->key = key;
2921
2922 /* Note that (for instance) linear_texcoord_s and
2923 * nearest_texcoord_s may be active at the same time, if the
2924 * sampler min_img_filter differs from its mag_img_filter.
2925 */
2926 if (sampler->normalized_coords) {
2927 samp->linear_texcoord_s = get_linear_wrap( sampler->wrap_s );
2928 samp->linear_texcoord_t = get_linear_wrap( sampler->wrap_t );
2929 samp->linear_texcoord_p = get_linear_wrap( sampler->wrap_r );
2930
2931 samp->nearest_texcoord_s = get_nearest_wrap( sampler->wrap_s );
2932 samp->nearest_texcoord_t = get_nearest_wrap( sampler->wrap_t );
2933 samp->nearest_texcoord_p = get_nearest_wrap( sampler->wrap_r );
2934 }
2935 else {
2936 samp->linear_texcoord_s = get_linear_unorm_wrap( sampler->wrap_s );
2937 samp->linear_texcoord_t = get_linear_unorm_wrap( sampler->wrap_t );
2938 samp->linear_texcoord_p = get_linear_unorm_wrap( sampler->wrap_r );
2939
2940 samp->nearest_texcoord_s = get_nearest_unorm_wrap( sampler->wrap_s );
2941 samp->nearest_texcoord_t = get_nearest_unorm_wrap( sampler->wrap_t );
2942 samp->nearest_texcoord_p = get_nearest_unorm_wrap( sampler->wrap_r );
2943 }
2944
2945 samp->compute_lambda = get_lambda_func( key );
2946
2947 samp->min_img_filter = get_img_filter(key, sampler->min_img_filter, sampler);
2948 samp->mag_img_filter = get_img_filter(key, sampler->mag_img_filter, sampler);
2949
2950 switch (sampler->min_mip_filter) {
2951 case PIPE_TEX_MIPFILTER_NONE:
2952 if (sampler->min_img_filter == sampler->mag_img_filter)
2953 samp->mip_filter = mip_filter_none_no_filter_select;
2954 else
2955 samp->mip_filter = mip_filter_none;
2956 break;
2957
2958 case PIPE_TEX_MIPFILTER_NEAREST:
2959 samp->mip_filter = mip_filter_nearest;
2960 break;
2961
2962 case PIPE_TEX_MIPFILTER_LINEAR:
2963 if (key.bits.is_pot &&
2964 key.bits.target == PIPE_TEXTURE_2D &&
2965 sampler->min_img_filter == sampler->mag_img_filter &&
2966 sampler->normalized_coords &&
2967 sampler->wrap_s == PIPE_TEX_WRAP_REPEAT &&
2968 sampler->wrap_t == PIPE_TEX_WRAP_REPEAT &&
2969 sampler->min_img_filter == PIPE_TEX_FILTER_LINEAR) {
2970 samp->mip_filter = mip_filter_linear_2d_linear_repeat_POT;
2971 }
2972 else {
2973 samp->mip_filter = mip_filter_linear;
2974 }
2975
2976 /* Anisotropic filtering extension. */
2977 if (sampler->max_anisotropy > 1) {
2978 samp->mip_filter = mip_filter_linear_aniso;
2979
2980 /* Override min_img_filter:
2981 * min_img_filter needs to be set to NEAREST since we need to access
2982 * each texture pixel as it is and weight it later; using linear
2983 * filters will have incorrect results.
2984 * By setting the filter to NEAREST here, we can avoid calling the
2985 * generic img_filter_2d_nearest in the anisotropic filter function,
2986 * making it possible to use one of the accelerated implementations
2987 */
2988 samp->min_img_filter = get_img_filter(key, PIPE_TEX_FILTER_NEAREST, sampler);
2989
2990 /* on first access create the lookup table containing the filter weights. */
2991 if (!weightLut) {
2992 create_filter_table();
2993 }
2994 }
2995
2996 break;
2997 }
2998
2999 if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) {
3000 samp->compare = sample_compare;
3001 }
3002 else {
3003 /* Skip compare operation by promoting the mip_filter function
3004 * pointer:
3005 */
3006 samp->compare = samp->mip_filter;
3007 }
3008
3009 if (key.bits.target == PIPE_TEXTURE_CUBE || key.bits.target == PIPE_TEXTURE_CUBE_ARRAY) {
3010 samp->sample_target = sample_cube;
3011 }
3012 else {
3013 samp->faces[0] = 0;
3014 samp->faces[1] = 0;
3015 samp->faces[2] = 0;
3016 samp->faces[3] = 0;
3017
3018 /* Skip cube face determination by promoting the compare
3019 * function pointer:
3020 */
3021 samp->sample_target = samp->compare;
3022 }
3023
3024 if (any_swizzle(key)) {
3025 samp->get_samples = sample_swizzle;
3026 }
3027 else {
3028 samp->get_samples = samp->sample_target;
3029 }
3030
3031 samp->get_dims = sample_get_dims;
3032 samp->get_texel = sample_get_texels;
3033 return samp;
3034 }
3035
3036
3037
3038 static void
3039 sp_tgsi_get_dims(struct tgsi_sampler *tgsi_sampler,
3040 const unsigned sview_index,
3041 int level, int dims[4])
3042 {
3043 const struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler;
3044
3045 assert(sp_samp->sp_sampler[sview_index]);
3046 sp_samp->sp_sampler[sview_index]->get_dims(sp_samp->sp_sampler[sview_index],
3047 level, dims);
3048 }
3049
3050
3051 static void
3052 sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler,
3053 const unsigned sview_index,
3054 const unsigned sampler_index,
3055 const float s[TGSI_QUAD_SIZE],
3056 const float t[TGSI_QUAD_SIZE],
3057 const float p[TGSI_QUAD_SIZE],
3058 const float c0[TGSI_QUAD_SIZE],
3059 const float lod[TGSI_QUAD_SIZE],
3060 float derivs[3][2][TGSI_QUAD_SIZE],
3061 const int8_t offset[3],
3062 enum tgsi_sampler_control control,
3063 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
3064 {
3065 const struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler;
3066 assert(sview_index < PIPE_MAX_SAMPLERS);
3067 assert(sview_index == sampler_index);
3068 assert(sp_samp->sp_sampler[sampler_index]);
3069 sp_samp->sp_sampler[sview_index]->get_samples(sp_samp->sp_sampler[sampler_index],
3070 s, t, p, c0, lod, control, rgba);
3071 }
3072
3073
3074 static void
3075 sp_tgsi_get_texel(struct tgsi_sampler *tgsi_sampler,
3076 const unsigned sview_index,
3077 const int i[TGSI_QUAD_SIZE],
3078 const int j[TGSI_QUAD_SIZE], const int k[TGSI_QUAD_SIZE],
3079 const int lod[TGSI_QUAD_SIZE], const int8_t offset[3],
3080 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
3081 {
3082 const struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler;
3083 assert(sview_index < PIPE_MAX_SAMPLERS);
3084 assert(sp_samp->sp_sampler[sview_index]);
3085 sp_samp->sp_sampler[sview_index]->get_texel(sp_samp->sp_sampler[sview_index],
3086 i, j, k, lod, offset, rgba);
3087 }
3088
3089
3090 struct sp_tgsi_sampler *
3091 sp_create_tgsi_sampler(void)
3092 {
3093 struct sp_tgsi_sampler *samp = CALLOC_STRUCT(sp_tgsi_sampler);
3094 if (!samp)
3095 return NULL;
3096
3097 samp->base.get_dims = sp_tgsi_get_dims;
3098 samp->base.get_samples = sp_tgsi_get_samples;
3099 samp->base.get_texel = sp_tgsi_get_texel;
3100
3101 return samp;
3102 }
3103