softpipe: start adding gather support (v2)
[mesa.git] / src / gallium / drivers / softpipe / sp_tex_sample.c
1 /**************************************************************************
2 *
3 * Copyright 2007 VMware, Inc.
4 * All Rights Reserved.
5 * Copyright 2008-2010 VMware, Inc. All rights reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * Texture sampling
31 *
32 * Authors:
33 * Brian Paul
34 * Keith Whitwell
35 */
36
37 #include "pipe/p_context.h"
38 #include "pipe/p_defines.h"
39 #include "pipe/p_shader_tokens.h"
40 #include "util/u_math.h"
41 #include "util/u_format.h"
42 #include "util/u_memory.h"
43 #include "util/u_inlines.h"
44 #include "sp_quad.h" /* only for #define QUAD_* tokens */
45 #include "sp_tex_sample.h"
46 #include "sp_texture.h"
47 #include "sp_tex_tile_cache.h"
48
49
50 /** Set to one to help debug texture sampling */
51 #define DEBUG_TEX 0
52
53
54 /*
55 * Return fractional part of 'f'. Used for computing interpolation weights.
56 * Need to be careful with negative values.
57 * Note, if this function isn't perfect you'll sometimes see 1-pixel bands
58 * of improperly weighted linear-filtered textures.
59 * The tests/texwrap.c demo is a good test.
60 */
61 static INLINE float
62 frac(float f)
63 {
64 return f - floorf(f);
65 }
66
67
68
69 /**
70 * Linear interpolation macro
71 */
72 static INLINE float
73 lerp(float a, float v0, float v1)
74 {
75 return v0 + a * (v1 - v0);
76 }
77
78
79 /**
80 * Do 2D/bilinear interpolation of float values.
81 * v00, v10, v01 and v11 are typically four texture samples in a square/box.
82 * a and b are the horizontal and vertical interpolants.
83 * It's important that this function is inlined when compiled with
84 * optimization! If we find that's not true on some systems, convert
85 * to a macro.
86 */
87 static INLINE float
88 lerp_2d(float a, float b,
89 float v00, float v10, float v01, float v11)
90 {
91 const float temp0 = lerp(a, v00, v10);
92 const float temp1 = lerp(a, v01, v11);
93 return lerp(b, temp0, temp1);
94 }
95
96
97 /**
98 * As above, but 3D interpolation of 8 values.
99 */
100 static INLINE float
101 lerp_3d(float a, float b, float c,
102 float v000, float v100, float v010, float v110,
103 float v001, float v101, float v011, float v111)
104 {
105 const float temp0 = lerp_2d(a, b, v000, v100, v010, v110);
106 const float temp1 = lerp_2d(a, b, v001, v101, v011, v111);
107 return lerp(c, temp0, temp1);
108 }
109
110
111
112 /**
113 * Compute coord % size for repeat wrap modes.
114 * Note that if coord is negative, coord % size doesn't give the right
115 * value. To avoid that problem we add a large multiple of the size
116 * (rather than using a conditional).
117 */
118 static INLINE int
119 repeat(int coord, unsigned size)
120 {
121 return (coord + size * 1024) % size;
122 }
123
124
125 /**
126 * Apply texture coord wrapping mode and return integer texture indexes
127 * for a vector of four texcoords (S or T or P).
128 * \param wrapMode PIPE_TEX_WRAP_x
129 * \param s the incoming texcoords
130 * \param size the texture image size
131 * \param icoord returns the integer texcoords
132 */
133 static void
134 wrap_nearest_repeat(float s, unsigned size, int offset, int *icoord)
135 {
136 /* s limited to [0,1) */
137 /* i limited to [0,size-1] */
138 int i = util_ifloor(s * size);
139 *icoord = repeat(i + offset, size);
140 }
141
142
143 static void
144 wrap_nearest_clamp(float s, unsigned size, int offset, int *icoord)
145 {
146 /* s limited to [0,1] */
147 /* i limited to [0,size-1] */
148 if (s <= 0.0F)
149 *icoord = 0;
150 else if (s >= 1.0F)
151 *icoord = size - 1;
152 else
153 *icoord = util_ifloor(s * size);
154 if (offset)
155 *icoord = CLAMP(*icoord + offset, 0, size - 1);
156 }
157
158
159 static void
160 wrap_nearest_clamp_to_edge(float s, unsigned size, int offset, int *icoord)
161 {
162 /* s limited to [min,max] */
163 /* i limited to [0, size-1] */
164 const float min = 1.0F / (2.0F * size);
165 const float max = 1.0F - min;
166
167 if (s < min)
168 *icoord = 0;
169 else if (s > max)
170 *icoord = size - 1;
171 else
172 *icoord = util_ifloor(s * size);
173 if (offset)
174 *icoord = CLAMP(*icoord + offset, 0, size - 1);
175 }
176
177
178 static void
179 wrap_nearest_clamp_to_border(float s, unsigned size, int offset, int *icoord)
180 {
181 /* s limited to [min,max] */
182 /* i limited to [-1, size] */
183 const float min = -1.0F / (2.0F * size);
184 const float max = 1.0F - min;
185 if (s <= min)
186 *icoord = -1;
187 else if (s >= max)
188 *icoord = size;
189 else
190 *icoord = util_ifloor(s * size);
191 if (offset)
192 *icoord = CLAMP(*icoord + offset, 0, size - 1);
193 }
194
195
196 static void
197 wrap_nearest_mirror_repeat(float s, unsigned size, int offset, int *icoord)
198 {
199 const float min = 1.0F / (2.0F * size);
200 const float max = 1.0F - min;
201 const int flr = util_ifloor(s);
202 float u = frac(s);
203 if (flr & 1)
204 u = 1.0F - u;
205 if (u < min)
206 *icoord = 0;
207 else if (u > max)
208 *icoord = size - 1;
209 else
210 *icoord = util_ifloor(u * size);
211 if (offset)
212 *icoord = CLAMP(*icoord + offset, 0, size - 1);
213 }
214
215
216 static void
217 wrap_nearest_mirror_clamp(float s, unsigned size, int offset, int *icoord)
218 {
219 /* s limited to [0,1] */
220 /* i limited to [0,size-1] */
221 const float u = fabsf(s);
222 if (u <= 0.0F)
223 *icoord = 0;
224 else if (u >= 1.0F)
225 *icoord = size - 1;
226 else
227 *icoord = util_ifloor(u * size);
228 if (offset)
229 *icoord = CLAMP(*icoord + offset, 0, size - 1);
230 }
231
232
233 static void
234 wrap_nearest_mirror_clamp_to_edge(float s, unsigned size, int offset, int *icoord)
235 {
236 /* s limited to [min,max] */
237 /* i limited to [0, size-1] */
238 const float min = 1.0F / (2.0F * size);
239 const float max = 1.0F - min;
240 const float u = fabsf(s);
241 if (u < min)
242 *icoord = 0;
243 else if (u > max)
244 *icoord = size - 1;
245 else
246 *icoord = util_ifloor(u * size);
247 if (offset)
248 *icoord = CLAMP(*icoord + offset, 0, size - 1);
249 }
250
251
252 static void
253 wrap_nearest_mirror_clamp_to_border(float s, unsigned size, int offset, int *icoord)
254 {
255 /* s limited to [min,max] */
256 /* i limited to [0, size-1] */
257 const float min = -1.0F / (2.0F * size);
258 const float max = 1.0F - min;
259 const float u = fabsf(s);
260 if (u < min)
261 *icoord = -1;
262 else if (u > max)
263 *icoord = size;
264 else
265 *icoord = util_ifloor(u * size);
266 if (offset)
267 *icoord = CLAMP(*icoord + offset, 0, size - 1);
268 }
269
270
271 /**
272 * Used to compute texel locations for linear sampling
273 * \param wrapMode PIPE_TEX_WRAP_x
274 * \param s the texcoord
275 * \param size the texture image size
276 * \param icoord0 returns first texture index
277 * \param icoord1 returns second texture index (usually icoord0 + 1)
278 * \param w returns blend factor/weight between texture indices
279 * \param icoord returns the computed integer texture coord
280 */
281 static void
282 wrap_linear_repeat(float s, unsigned size, int offset,
283 int *icoord0, int *icoord1, float *w)
284 {
285 float u = s * size - 0.5F;
286 *icoord0 = repeat(util_ifloor(u) + offset, size);
287 *icoord1 = repeat(*icoord0 + 1, size);
288 *w = frac(u);
289 }
290
291
292 static void
293 wrap_linear_clamp(float s, unsigned size, int offset,
294 int *icoord0, int *icoord1, float *w)
295 {
296 float u = CLAMP(s, 0.0F, 1.0F);
297 u = u * size - 0.5f;
298 *icoord0 = util_ifloor(u);
299 *icoord1 = *icoord0 + 1;
300 if (offset) {
301 *icoord0 = CLAMP(*icoord0 + offset, 0, size - 1);
302 *icoord1 = CLAMP(*icoord1 + offset, 0, size - 1);
303 }
304 *w = frac(u);
305 }
306
307
308 static void
309 wrap_linear_clamp_to_edge(float s, unsigned size, int offset,
310 int *icoord0, int *icoord1, float *w)
311 {
312 float u = CLAMP(s, 0.0F, 1.0F);
313 u = u * size - 0.5f;
314 *icoord0 = util_ifloor(u);
315 *icoord1 = *icoord0 + 1;
316 if (*icoord0 < 0)
317 *icoord0 = 0;
318 if (*icoord1 >= (int) size)
319 *icoord1 = size - 1;
320 if (offset) {
321 *icoord0 = CLAMP(*icoord0 + offset, 0, size - 1);
322 *icoord1 = CLAMP(*icoord1 + offset, 0, size - 1);
323 }
324 *w = frac(u);
325 }
326
327
328 static void
329 wrap_linear_clamp_to_border(float s, unsigned size, int offset,
330 int *icoord0, int *icoord1, float *w)
331 {
332 const float min = -1.0F / (2.0F * size);
333 const float max = 1.0F - min;
334 float u = CLAMP(s, min, max);
335 u = u * size - 0.5f;
336 *icoord0 = util_ifloor(u);
337 *icoord1 = *icoord0 + 1;
338 *w = frac(u);
339 }
340
341
342 static void
343 wrap_linear_mirror_repeat(float s, unsigned size, int offset,
344 int *icoord0, int *icoord1, float *w)
345 {
346 const int flr = util_ifloor(s);
347 float u = frac(s);
348 if (flr & 1)
349 u = 1.0F - u;
350 u = u * size - 0.5F;
351 *icoord0 = util_ifloor(u);
352 *icoord1 = *icoord0 + 1;
353 if (*icoord0 < 0)
354 *icoord0 = 0;
355 if (*icoord1 >= (int) size)
356 *icoord1 = size - 1;
357 *w = frac(u);
358 }
359
360
361 static void
362 wrap_linear_mirror_clamp(float s, unsigned size, int offset,
363 int *icoord0, int *icoord1, float *w)
364 {
365 float u = fabsf(s);
366 if (u >= 1.0F)
367 u = (float) size;
368 else
369 u *= size;
370 u -= 0.5F;
371 *icoord0 = util_ifloor(u);
372 *icoord1 = *icoord0 + 1;
373 *w = frac(u);
374 }
375
376
377 static void
378 wrap_linear_mirror_clamp_to_edge(float s, unsigned size, int offset,
379 int *icoord0, int *icoord1, float *w)
380 {
381 float u = fabsf(s);
382 if (u >= 1.0F)
383 u = (float) size;
384 else
385 u *= size;
386 u -= 0.5F;
387 *icoord0 = util_ifloor(u);
388 *icoord1 = *icoord0 + 1;
389 if (*icoord0 < 0)
390 *icoord0 = 0;
391 if (*icoord1 >= (int) size)
392 *icoord1 = size - 1;
393 *w = frac(u);
394 }
395
396
397 static void
398 wrap_linear_mirror_clamp_to_border(float s, unsigned size, int offset,
399 int *icoord0, int *icoord1, float *w)
400 {
401 const float min = -1.0F / (2.0F * size);
402 const float max = 1.0F - min;
403 float u = fabsf(s);
404 if (u <= min)
405 u = min * size;
406 else if (u >= max)
407 u = max * size;
408 else
409 u *= size;
410 u -= 0.5F;
411 *icoord0 = util_ifloor(u);
412 *icoord1 = *icoord0 + 1;
413 *w = frac(u);
414 }
415
416
417 /**
418 * PIPE_TEX_WRAP_CLAMP for nearest sampling, unnormalized coords.
419 */
420 static void
421 wrap_nearest_unorm_clamp(float s, unsigned size, int offset, int *icoord)
422 {
423 int i = util_ifloor(s);
424 *icoord = CLAMP(i + offset, 0, (int) size-1);
425 }
426
427
428 /**
429 * PIPE_TEX_WRAP_CLAMP_TO_BORDER for nearest sampling, unnormalized coords.
430 */
431 static void
432 wrap_nearest_unorm_clamp_to_border(float s, unsigned size, int offset, int *icoord)
433 {
434 *icoord = util_ifloor( CLAMP(s + offset, -0.5F, (float) size + 0.5F) );
435 }
436
437
438 /**
439 * PIPE_TEX_WRAP_CLAMP_TO_EDGE for nearest sampling, unnormalized coords.
440 */
441 static void
442 wrap_nearest_unorm_clamp_to_edge(float s, unsigned size, int offset, int *icoord)
443 {
444 *icoord = util_ifloor( CLAMP(s + offset, 0.5F, (float) size - 0.5F) );
445 }
446
447
448 /**
449 * PIPE_TEX_WRAP_CLAMP for linear sampling, unnormalized coords.
450 */
451 static void
452 wrap_linear_unorm_clamp(float s, unsigned size, int offset,
453 int *icoord0, int *icoord1, float *w)
454 {
455 /* Not exactly what the spec says, but it matches NVIDIA output */
456 float u = CLAMP(s + offset - 0.5F, 0.0f, (float) size - 1.0f);
457 *icoord0 = util_ifloor(u);
458 *icoord1 = *icoord0 + 1;
459 *w = frac(u);
460 }
461
462
463 /**
464 * PIPE_TEX_WRAP_CLAMP_TO_BORDER for linear sampling, unnormalized coords.
465 */
466 static void
467 wrap_linear_unorm_clamp_to_border(float s, unsigned size, int offset,
468 int *icoord0, int *icoord1, float *w)
469 {
470 float u = CLAMP(s + offset, -0.5F, (float) size + 0.5F);
471 u -= 0.5F;
472 *icoord0 = util_ifloor(u);
473 *icoord1 = *icoord0 + 1;
474 if (*icoord1 > (int) size - 1)
475 *icoord1 = size - 1;
476 *w = frac(u);
477 }
478
479
480 /**
481 * PIPE_TEX_WRAP_CLAMP_TO_EDGE for linear sampling, unnormalized coords.
482 */
483 static void
484 wrap_linear_unorm_clamp_to_edge(float s, unsigned size, int offset,
485 int *icoord0, int *icoord1, float *w)
486 {
487 float u = CLAMP(s + offset, +0.5F, (float) size - 0.5F);
488 u -= 0.5F;
489 *icoord0 = util_ifloor(u);
490 *icoord1 = *icoord0 + 1;
491 if (*icoord1 > (int) size - 1)
492 *icoord1 = size - 1;
493 *w = frac(u);
494 }
495
496
497 /**
498 * Do coordinate to array index conversion. For array textures.
499 */
500 static INLINE int
501 coord_to_layer(float coord, unsigned first_layer, unsigned last_layer)
502 {
503 int c = util_ifloor(coord + 0.5F);
504 return CLAMP(c, (int)first_layer, (int)last_layer);
505 }
506
507
508 /**
509 * Examine the quad's texture coordinates to compute the partial
510 * derivatives w.r.t X and Y, then compute lambda (level of detail).
511 */
512 static float
513 compute_lambda_1d(const struct sp_sampler_view *sview,
514 const float s[TGSI_QUAD_SIZE],
515 const float t[TGSI_QUAD_SIZE],
516 const float p[TGSI_QUAD_SIZE])
517 {
518 const struct pipe_resource *texture = sview->base.texture;
519 float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
520 float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]);
521 float rho = MAX2(dsdx, dsdy) * u_minify(texture->width0, sview->base.u.tex.first_level);
522
523 return util_fast_log2(rho);
524 }
525
526
527 static float
528 compute_lambda_2d(const struct sp_sampler_view *sview,
529 const float s[TGSI_QUAD_SIZE],
530 const float t[TGSI_QUAD_SIZE],
531 const float p[TGSI_QUAD_SIZE])
532 {
533 const struct pipe_resource *texture = sview->base.texture;
534 float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
535 float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]);
536 float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
537 float dtdy = fabsf(t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]);
538 float maxx = MAX2(dsdx, dsdy) * u_minify(texture->width0, sview->base.u.tex.first_level);
539 float maxy = MAX2(dtdx, dtdy) * u_minify(texture->height0, sview->base.u.tex.first_level);
540 float rho = MAX2(maxx, maxy);
541
542 return util_fast_log2(rho);
543 }
544
545
546 static float
547 compute_lambda_3d(const struct sp_sampler_view *sview,
548 const float s[TGSI_QUAD_SIZE],
549 const float t[TGSI_QUAD_SIZE],
550 const float p[TGSI_QUAD_SIZE])
551 {
552 const struct pipe_resource *texture = sview->base.texture;
553 float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
554 float dsdy = fabsf(s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]);
555 float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
556 float dtdy = fabsf(t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]);
557 float dpdx = fabsf(p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]);
558 float dpdy = fabsf(p[QUAD_TOP_LEFT] - p[QUAD_BOTTOM_LEFT]);
559 float maxx = MAX2(dsdx, dsdy) * u_minify(texture->width0, sview->base.u.tex.first_level);
560 float maxy = MAX2(dtdx, dtdy) * u_minify(texture->height0, sview->base.u.tex.first_level);
561 float maxz = MAX2(dpdx, dpdy) * u_minify(texture->depth0, sview->base.u.tex.first_level);
562 float rho;
563
564 rho = MAX2(maxx, maxy);
565 rho = MAX2(rho, maxz);
566
567 return util_fast_log2(rho);
568 }
569
570
571 /**
572 * Compute lambda for a vertex texture sampler.
573 * Since there aren't derivatives to use, just return 0.
574 */
575 static float
576 compute_lambda_vert(const struct sp_sampler_view *sview,
577 const float s[TGSI_QUAD_SIZE],
578 const float t[TGSI_QUAD_SIZE],
579 const float p[TGSI_QUAD_SIZE])
580 {
581 return 0.0f;
582 }
583
584
585
586 /**
587 * Get a texel from a texture, using the texture tile cache.
588 *
589 * \param addr the template tex address containing cube, z, face info.
590 * \param x the x coord of texel within 2D image
591 * \param y the y coord of texel within 2D image
592 * \param rgba the quad to put the texel/color into
593 *
594 * XXX maybe move this into sp_tex_tile_cache.c and merge with the
595 * sp_get_cached_tile_tex() function.
596 */
597
598
599
600
601 static INLINE const float *
602 get_texel_2d_no_border(const struct sp_sampler_view *sp_sview,
603 union tex_tile_address addr, int x, int y)
604 {
605 const struct softpipe_tex_cached_tile *tile;
606 addr.bits.x = x / TEX_TILE_SIZE;
607 addr.bits.y = y / TEX_TILE_SIZE;
608 y %= TEX_TILE_SIZE;
609 x %= TEX_TILE_SIZE;
610
611 tile = sp_get_cached_tile_tex(sp_sview->cache, addr);
612
613 return &tile->data.color[y][x][0];
614 }
615
616
617 static INLINE const float *
618 get_texel_2d(const struct sp_sampler_view *sp_sview,
619 const struct sp_sampler *sp_samp,
620 union tex_tile_address addr, int x, int y)
621 {
622 const struct pipe_resource *texture = sp_sview->base.texture;
623 unsigned level = addr.bits.level;
624
625 if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
626 y < 0 || y >= (int) u_minify(texture->height0, level)) {
627 return sp_samp->base.border_color.f;
628 }
629 else {
630 return get_texel_2d_no_border( sp_sview, addr, x, y );
631 }
632 }
633
634
635 /*
636 * Here's the complete logic (HOLY CRAP) for finding next face and doing the
637 * corresponding coord wrapping, implemented by get_next_face,
638 * get_next_xcoord, get_next_ycoord.
639 * Read like that (first line):
640 * If face is +x and s coord is below zero, then
641 * new face is +z, new s is max , new t is old t
642 * (max is always cube size - 1).
643 *
644 * +x s- -> +z: s = max, t = t
645 * +x s+ -> -z: s = 0, t = t
646 * +x t- -> +y: s = max, t = max-s
647 * +x t+ -> -y: s = max, t = s
648 *
649 * -x s- -> -z: s = max, t = t
650 * -x s+ -> +z: s = 0, t = t
651 * -x t- -> +y: s = 0, t = s
652 * -x t+ -> -y: s = 0, t = max-s
653 *
654 * +y s- -> -x: s = t, t = 0
655 * +y s+ -> +x: s = max-t, t = 0
656 * +y t- -> -z: s = max-s, t = 0
657 * +y t+ -> +z: s = s, t = 0
658 *
659 * -y s- -> -x: s = max-t, t = max
660 * -y s+ -> +x: s = t, t = max
661 * -y t- -> +z: s = s, t = max
662 * -y t+ -> -z: s = max-s, t = max
663
664 * +z s- -> -x: s = max, t = t
665 * +z s+ -> +x: s = 0, t = t
666 * +z t- -> +y: s = s, t = max
667 * +z t+ -> -y: s = s, t = 0
668
669 * -z s- -> +x: s = max, t = t
670 * -z s+ -> -x: s = 0, t = t
671 * -z t- -> +y: s = max-s, t = 0
672 * -z t+ -> -y: s = max-s, t = max
673 */
674
675
676 /*
677 * seamless cubemap neighbour array.
678 * this array is used to find the adjacent face in each of 4 directions,
679 * left, right, up, down. (or -x, +x, -y, +y).
680 */
681 static const unsigned face_array[PIPE_TEX_FACE_MAX][4] = {
682 /* pos X first then neg X is Z different, Y the same */
683 /* PIPE_TEX_FACE_POS_X,*/
684 { PIPE_TEX_FACE_POS_Z, PIPE_TEX_FACE_NEG_Z,
685 PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y },
686 /* PIPE_TEX_FACE_NEG_X */
687 { PIPE_TEX_FACE_NEG_Z, PIPE_TEX_FACE_POS_Z,
688 PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y },
689
690 /* pos Y first then neg Y is X different, X the same */
691 /* PIPE_TEX_FACE_POS_Y */
692 { PIPE_TEX_FACE_NEG_X, PIPE_TEX_FACE_POS_X,
693 PIPE_TEX_FACE_NEG_Z, PIPE_TEX_FACE_POS_Z },
694
695 /* PIPE_TEX_FACE_NEG_Y */
696 { PIPE_TEX_FACE_NEG_X, PIPE_TEX_FACE_POS_X,
697 PIPE_TEX_FACE_POS_Z, PIPE_TEX_FACE_NEG_Z },
698
699 /* pos Z first then neg Y is X different, X the same */
700 /* PIPE_TEX_FACE_POS_Z */
701 { PIPE_TEX_FACE_NEG_X, PIPE_TEX_FACE_POS_X,
702 PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y },
703
704 /* PIPE_TEX_FACE_NEG_Z */
705 { PIPE_TEX_FACE_POS_X, PIPE_TEX_FACE_NEG_X,
706 PIPE_TEX_FACE_POS_Y, PIPE_TEX_FACE_NEG_Y }
707 };
708
709 static INLINE unsigned
710 get_next_face(unsigned face, int idx)
711 {
712 return face_array[face][idx];
713 }
714
715 /*
716 * return a new xcoord based on old face, old coords, cube size
717 * and fall_off_index (0 for x-, 1 for x+, 2 for y-, 3 for y+)
718 */
719 static INLINE int
720 get_next_xcoord(unsigned face, unsigned fall_off_index, int max, int xc, int yc)
721 {
722 if ((face == 0 && fall_off_index != 1) ||
723 (face == 1 && fall_off_index == 0) ||
724 (face == 4 && fall_off_index == 0) ||
725 (face == 5 && fall_off_index == 0)) {
726 return max;
727 }
728 if ((face == 1 && fall_off_index != 0) ||
729 (face == 0 && fall_off_index == 1) ||
730 (face == 4 && fall_off_index == 1) ||
731 (face == 5 && fall_off_index == 1)) {
732 return 0;
733 }
734 if ((face == 4 && fall_off_index >= 2) ||
735 (face == 2 && fall_off_index == 3) ||
736 (face == 3 && fall_off_index == 2)) {
737 return xc;
738 }
739 if ((face == 5 && fall_off_index >= 2) ||
740 (face == 2 && fall_off_index == 2) ||
741 (face == 3 && fall_off_index == 3)) {
742 return max - xc;
743 }
744 if ((face == 2 && fall_off_index == 0) ||
745 (face == 3 && fall_off_index == 1)) {
746 return yc;
747 }
748 /* (face == 2 && fall_off_index == 1) ||
749 (face == 3 && fall_off_index == 0)) */
750 return max - yc;
751 }
752
753 /*
754 * return a new ycoord based on old face, old coords, cube size
755 * and fall_off_index (0 for x-, 1 for x+, 2 for y-, 3 for y+)
756 */
757 static INLINE int
758 get_next_ycoord(unsigned face, unsigned fall_off_index, int max, int xc, int yc)
759 {
760 if ((fall_off_index <= 1) && (face <= 1 || face >= 4)) {
761 return yc;
762 }
763 if (face == 2 ||
764 (face == 4 && fall_off_index == 3) ||
765 (face == 5 && fall_off_index == 2)) {
766 return 0;
767 }
768 if (face == 3 ||
769 (face == 4 && fall_off_index == 2) ||
770 (face == 5 && fall_off_index == 3)) {
771 return max;
772 }
773 if ((face == 0 && fall_off_index == 3) ||
774 (face == 1 && fall_off_index == 2)) {
775 return xc;
776 }
777 /* (face == 0 && fall_off_index == 2) ||
778 (face == 1 && fall_off_index == 3) */
779 return max - xc;
780 }
781
782
783 /* Gather a quad of adjacent texels within a tile:
784 */
785 static INLINE void
786 get_texel_quad_2d_no_border_single_tile(const struct sp_sampler_view *sp_sview,
787 union tex_tile_address addr,
788 unsigned x, unsigned y,
789 const float *out[4])
790 {
791 const struct softpipe_tex_cached_tile *tile;
792
793 addr.bits.x = x / TEX_TILE_SIZE;
794 addr.bits.y = y / TEX_TILE_SIZE;
795 y %= TEX_TILE_SIZE;
796 x %= TEX_TILE_SIZE;
797
798 tile = sp_get_cached_tile_tex(sp_sview->cache, addr);
799
800 out[0] = &tile->data.color[y ][x ][0];
801 out[1] = &tile->data.color[y ][x+1][0];
802 out[2] = &tile->data.color[y+1][x ][0];
803 out[3] = &tile->data.color[y+1][x+1][0];
804 }
805
806
807 /* Gather a quad of potentially non-adjacent texels:
808 */
809 static INLINE void
810 get_texel_quad_2d_no_border(const struct sp_sampler_view *sp_sview,
811 union tex_tile_address addr,
812 int x0, int y0,
813 int x1, int y1,
814 const float *out[4])
815 {
816 out[0] = get_texel_2d_no_border( sp_sview, addr, x0, y0 );
817 out[1] = get_texel_2d_no_border( sp_sview, addr, x1, y0 );
818 out[2] = get_texel_2d_no_border( sp_sview, addr, x0, y1 );
819 out[3] = get_texel_2d_no_border( sp_sview, addr, x1, y1 );
820 }
821
822 /* Can involve a lot of unnecessary checks for border color:
823 */
824 static INLINE void
825 get_texel_quad_2d(const struct sp_sampler_view *sp_sview,
826 const struct sp_sampler *sp_samp,
827 union tex_tile_address addr,
828 int x0, int y0,
829 int x1, int y1,
830 const float *out[4])
831 {
832 out[0] = get_texel_2d( sp_sview, sp_samp, addr, x0, y0 );
833 out[1] = get_texel_2d( sp_sview, sp_samp, addr, x1, y0 );
834 out[3] = get_texel_2d( sp_sview, sp_samp, addr, x1, y1 );
835 out[2] = get_texel_2d( sp_sview, sp_samp, addr, x0, y1 );
836 }
837
838
839
840 /* 3d variants:
841 */
842 static INLINE const float *
843 get_texel_3d_no_border(const struct sp_sampler_view *sp_sview,
844 union tex_tile_address addr, int x, int y, int z)
845 {
846 const struct softpipe_tex_cached_tile *tile;
847
848 addr.bits.x = x / TEX_TILE_SIZE;
849 addr.bits.y = y / TEX_TILE_SIZE;
850 addr.bits.z = z;
851 y %= TEX_TILE_SIZE;
852 x %= TEX_TILE_SIZE;
853
854 tile = sp_get_cached_tile_tex(sp_sview->cache, addr);
855
856 return &tile->data.color[y][x][0];
857 }
858
859
860 static INLINE const float *
861 get_texel_3d(const struct sp_sampler_view *sp_sview,
862 const struct sp_sampler *sp_samp,
863 union tex_tile_address addr, int x, int y, int z)
864 {
865 const struct pipe_resource *texture = sp_sview->base.texture;
866 unsigned level = addr.bits.level;
867
868 if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
869 y < 0 || y >= (int) u_minify(texture->height0, level) ||
870 z < 0 || z >= (int) u_minify(texture->depth0, level)) {
871 return sp_samp->base.border_color.f;
872 }
873 else {
874 return get_texel_3d_no_border( sp_sview, addr, x, y, z );
875 }
876 }
877
878
879 /* Get texel pointer for 1D array texture */
880 static INLINE const float *
881 get_texel_1d_array(const struct sp_sampler_view *sp_sview,
882 const struct sp_sampler *sp_samp,
883 union tex_tile_address addr, int x, int y)
884 {
885 const struct pipe_resource *texture = sp_sview->base.texture;
886 unsigned level = addr.bits.level;
887
888 if (x < 0 || x >= (int) u_minify(texture->width0, level)) {
889 return sp_samp->base.border_color.f;
890 }
891 else {
892 return get_texel_2d_no_border(sp_sview, addr, x, y);
893 }
894 }
895
896
897 /* Get texel pointer for 2D array texture */
898 static INLINE const float *
899 get_texel_2d_array(const struct sp_sampler_view *sp_sview,
900 const struct sp_sampler *sp_samp,
901 union tex_tile_address addr, int x, int y, int layer)
902 {
903 const struct pipe_resource *texture = sp_sview->base.texture;
904 unsigned level = addr.bits.level;
905
906 assert(layer < (int) texture->array_size);
907 assert(layer >= 0);
908
909 if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
910 y < 0 || y >= (int) u_minify(texture->height0, level)) {
911 return sp_samp->base.border_color.f;
912 }
913 else {
914 return get_texel_3d_no_border(sp_sview, addr, x, y, layer);
915 }
916 }
917
918
919 static INLINE const float *
920 get_texel_cube_seamless(const struct sp_sampler_view *sp_sview,
921 union tex_tile_address addr, int x, int y,
922 float *corner, int layer, unsigned face)
923 {
924 const struct pipe_resource *texture = sp_sview->base.texture;
925 unsigned level = addr.bits.level;
926 int new_x, new_y, max_x;
927
928 max_x = (int) u_minify(texture->width0, level);
929
930 assert(texture->width0 == texture->height0);
931 new_x = x;
932 new_y = y;
933
934 /* change the face */
935 if (x < 0) {
936 /*
937 * Cheat with corners. They are difficult and I believe because we don't get
938 * per-pixel faces we can actually have multiple corner texels per pixel,
939 * which screws things up majorly in any case (as the per spec behavior is
940 * to average the 3 remaining texels, which we might not have).
941 * Hence just make sure that the 2nd coord is clamped, will simply pick the
942 * sample which would have fallen off the x coord, but not y coord.
943 * So the filter weight of the samples will be wrong, but at least this
944 * ensures that only valid texels near the corner are used.
945 */
946 if (y < 0 || y >= max_x) {
947 y = CLAMP(y, 0, max_x - 1);
948 }
949 new_x = get_next_xcoord(face, 0, max_x -1, x, y);
950 new_y = get_next_ycoord(face, 0, max_x -1, x, y);
951 face = get_next_face(face, 0);
952 } else if (x >= max_x) {
953 if (y < 0 || y >= max_x) {
954 y = CLAMP(y, 0, max_x - 1);
955 }
956 new_x = get_next_xcoord(face, 1, max_x -1, x, y);
957 new_y = get_next_ycoord(face, 1, max_x -1, x, y);
958 face = get_next_face(face, 1);
959 } else if (y < 0) {
960 new_x = get_next_xcoord(face, 2, max_x -1, x, y);
961 new_y = get_next_ycoord(face, 2, max_x -1, x, y);
962 face = get_next_face(face, 2);
963 } else if (y >= max_x) {
964 new_x = get_next_xcoord(face, 3, max_x -1, x, y);
965 new_y = get_next_ycoord(face, 3, max_x -1, x, y);
966 face = get_next_face(face, 3);
967 }
968
969 return get_texel_3d_no_border(sp_sview, addr, new_x, new_y, layer + face);
970 }
971
972
973 /* Get texel pointer for cube array texture */
974 static INLINE const float *
975 get_texel_cube_array(const struct sp_sampler_view *sp_sview,
976 const struct sp_sampler *sp_samp,
977 union tex_tile_address addr, int x, int y, int layer)
978 {
979 const struct pipe_resource *texture = sp_sview->base.texture;
980 unsigned level = addr.bits.level;
981
982 assert(layer < (int) texture->array_size);
983 assert(layer >= 0);
984
985 if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
986 y < 0 || y >= (int) u_minify(texture->height0, level)) {
987 return sp_samp->base.border_color.f;
988 }
989 else {
990 return get_texel_3d_no_border(sp_sview, addr, x, y, layer);
991 }
992 }
993 /**
994 * Given the logbase2 of a mipmap's base level size and a mipmap level,
995 * return the size (in texels) of that mipmap level.
996 * For example, if level[0].width = 256 then base_pot will be 8.
997 * If level = 2, then we'll return 64 (the width at level=2).
998 * Return 1 if level > base_pot.
999 */
1000 static INLINE unsigned
1001 pot_level_size(unsigned base_pot, unsigned level)
1002 {
1003 return (base_pot >= level) ? (1 << (base_pot - level)) : 1;
1004 }
1005
1006
1007 static void
1008 print_sample(const char *function, const float *rgba)
1009 {
1010 debug_printf("%s %g %g %g %g\n",
1011 function,
1012 rgba[0], rgba[TGSI_NUM_CHANNELS], rgba[2*TGSI_NUM_CHANNELS], rgba[3*TGSI_NUM_CHANNELS]);
1013 }
1014
1015
1016 static void
1017 print_sample_4(const char *function, float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1018 {
1019 debug_printf("%s %g %g %g %g, %g %g %g %g, %g %g %g %g, %g %g %g %g\n",
1020 function,
1021 rgba[0][0], rgba[1][0], rgba[2][0], rgba[3][0],
1022 rgba[0][1], rgba[1][1], rgba[2][1], rgba[3][1],
1023 rgba[0][2], rgba[1][2], rgba[2][2], rgba[3][2],
1024 rgba[0][3], rgba[1][3], rgba[2][3], rgba[3][3]);
1025 }
1026
1027
1028 /* Some image-filter fastpaths:
1029 */
1030 static INLINE void
1031 img_filter_2d_linear_repeat_POT(struct sp_sampler_view *sp_sview,
1032 struct sp_sampler *sp_samp,
1033 const struct img_filter_args *args,
1034 float *rgba)
1035 {
1036 unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
1037 unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
1038 int xmax = (xpot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, xpot) - 1; */
1039 int ymax = (ypot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, ypot) - 1; */
1040 union tex_tile_address addr;
1041 int c;
1042
1043 float u = args->s * xpot - 0.5F;
1044 float v = args->t * ypot - 0.5F;
1045
1046 int uflr = util_ifloor(u);
1047 int vflr = util_ifloor(v);
1048
1049 float xw = u - (float)uflr;
1050 float yw = v - (float)vflr;
1051
1052 int x0 = uflr & (xpot - 1);
1053 int y0 = vflr & (ypot - 1);
1054
1055 const float *tx[4];
1056
1057 addr.value = 0;
1058 addr.bits.level = args->level;
1059
1060 /* Can we fetch all four at once:
1061 */
1062 if (x0 < xmax && y0 < ymax) {
1063 get_texel_quad_2d_no_border_single_tile(sp_sview, addr, x0, y0, tx);
1064 }
1065 else {
1066 unsigned x1 = (x0 + 1) & (xpot - 1);
1067 unsigned y1 = (y0 + 1) & (ypot - 1);
1068 get_texel_quad_2d_no_border(sp_sview, addr, x0, y0, x1, y1, tx);
1069 }
1070
1071 /* interpolate R, G, B, A */
1072 for (c = 0; c < TGSI_QUAD_SIZE; c++) {
1073 rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1074 tx[0][c], tx[1][c],
1075 tx[2][c], tx[3][c]);
1076 }
1077
1078 if (DEBUG_TEX) {
1079 print_sample(__FUNCTION__, rgba);
1080 }
1081 }
1082
1083
1084 static INLINE void
1085 img_filter_2d_nearest_repeat_POT(struct sp_sampler_view *sp_sview,
1086 struct sp_sampler *sp_samp,
1087 const struct img_filter_args *args,
1088 float rgba[TGSI_QUAD_SIZE])
1089 {
1090 unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
1091 unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
1092 const float *out;
1093 union tex_tile_address addr;
1094 int c;
1095
1096 float u = args->s * xpot;
1097 float v = args->t * ypot;
1098
1099 int uflr = util_ifloor(u);
1100 int vflr = util_ifloor(v);
1101
1102 int x0 = uflr & (xpot - 1);
1103 int y0 = vflr & (ypot - 1);
1104
1105 addr.value = 0;
1106 addr.bits.level = args->level;
1107
1108 out = get_texel_2d_no_border(sp_sview, addr, x0, y0);
1109 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1110 rgba[TGSI_NUM_CHANNELS*c] = out[c];
1111
1112 if (DEBUG_TEX) {
1113 print_sample(__FUNCTION__, rgba);
1114 }
1115 }
1116
1117
1118 static INLINE void
1119 img_filter_2d_nearest_clamp_POT(struct sp_sampler_view *sp_sview,
1120 struct sp_sampler *sp_samp,
1121 const struct img_filter_args *args,
1122 float rgba[TGSI_QUAD_SIZE])
1123 {
1124 unsigned xpot = pot_level_size(sp_sview->xpot, args->level);
1125 unsigned ypot = pot_level_size(sp_sview->ypot, args->level);
1126 union tex_tile_address addr;
1127 int c;
1128
1129 float u = args->s * xpot;
1130 float v = args->t * ypot;
1131
1132 int x0, y0;
1133 const float *out;
1134
1135 addr.value = 0;
1136 addr.bits.level = args->level;
1137
1138 x0 = util_ifloor(u);
1139 if (x0 < 0)
1140 x0 = 0;
1141 else if (x0 > (int) xpot - 1)
1142 x0 = xpot - 1;
1143
1144 y0 = util_ifloor(v);
1145 if (y0 < 0)
1146 y0 = 0;
1147 else if (y0 > (int) ypot - 1)
1148 y0 = ypot - 1;
1149
1150 out = get_texel_2d_no_border(sp_sview, addr, x0, y0);
1151 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1152 rgba[TGSI_NUM_CHANNELS*c] = out[c];
1153
1154 if (DEBUG_TEX) {
1155 print_sample(__FUNCTION__, rgba);
1156 }
1157 }
1158
1159
1160 static void
1161 img_filter_1d_nearest(struct sp_sampler_view *sp_sview,
1162 struct sp_sampler *sp_samp,
1163 const struct img_filter_args *args,
1164 float rgba[TGSI_QUAD_SIZE])
1165 {
1166 const struct pipe_resource *texture = sp_sview->base.texture;
1167 int width;
1168 int x;
1169 union tex_tile_address addr;
1170 const float *out;
1171 int c;
1172
1173 width = u_minify(texture->width0, args->level);
1174
1175 assert(width > 0);
1176
1177 addr.value = 0;
1178 addr.bits.level = args->level;
1179
1180 sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
1181
1182 out = get_texel_2d(sp_sview, sp_samp, addr, x, 0);
1183 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1184 rgba[TGSI_NUM_CHANNELS*c] = out[c];
1185
1186 if (DEBUG_TEX) {
1187 print_sample(__FUNCTION__, rgba);
1188 }
1189 }
1190
1191
1192 static void
1193 img_filter_1d_array_nearest(struct sp_sampler_view *sp_sview,
1194 struct sp_sampler *sp_samp,
1195 const struct img_filter_args *args,
1196 float *rgba)
1197 {
1198 const struct pipe_resource *texture = sp_sview->base.texture;
1199 int width;
1200 int x, layer;
1201 union tex_tile_address addr;
1202 const float *out;
1203 int c;
1204
1205 width = u_minify(texture->width0, args->level);
1206
1207 assert(width > 0);
1208
1209 addr.value = 0;
1210 addr.bits.level = args->level;
1211
1212 sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
1213 layer = coord_to_layer(args->t, sp_sview->base.u.tex.first_layer,
1214 sp_sview->base.u.tex.last_layer);
1215
1216 out = get_texel_1d_array(sp_sview, sp_samp, addr, x, layer);
1217 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1218 rgba[TGSI_NUM_CHANNELS*c] = out[c];
1219
1220 if (DEBUG_TEX) {
1221 print_sample(__FUNCTION__, rgba);
1222 }
1223 }
1224
1225
1226 static void
1227 img_filter_2d_nearest(struct sp_sampler_view *sp_sview,
1228 struct sp_sampler *sp_samp,
1229 const struct img_filter_args *args,
1230 float *rgba)
1231 {
1232 const struct pipe_resource *texture = sp_sview->base.texture;
1233 int width, height;
1234 int x, y;
1235 union tex_tile_address addr;
1236 const float *out;
1237 int c;
1238
1239 width = u_minify(texture->width0, args->level);
1240 height = u_minify(texture->height0, args->level);
1241
1242 assert(width > 0);
1243 assert(height > 0);
1244
1245 addr.value = 0;
1246 addr.bits.level = args->level;
1247
1248 sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
1249 sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
1250
1251 out = get_texel_2d(sp_sview, sp_samp, addr, x, y);
1252 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1253 rgba[TGSI_NUM_CHANNELS*c] = out[c];
1254
1255 if (DEBUG_TEX) {
1256 print_sample(__FUNCTION__, rgba);
1257 }
1258 }
1259
1260
1261 static void
1262 img_filter_2d_array_nearest(struct sp_sampler_view *sp_sview,
1263 struct sp_sampler *sp_samp,
1264 const struct img_filter_args *args,
1265 float *rgba)
1266 {
1267 const struct pipe_resource *texture = sp_sview->base.texture;
1268 int width, height;
1269 int x, y, layer;
1270 union tex_tile_address addr;
1271 const float *out;
1272 int c;
1273
1274 width = u_minify(texture->width0, args->level);
1275 height = u_minify(texture->height0, args->level);
1276
1277 assert(width > 0);
1278 assert(height > 0);
1279
1280 addr.value = 0;
1281 addr.bits.level = args->level;
1282
1283 sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
1284 sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
1285 layer = coord_to_layer(args->p, sp_sview->base.u.tex.first_layer,
1286 sp_sview->base.u.tex.last_layer);
1287
1288 out = get_texel_2d_array(sp_sview, sp_samp, addr, x, y, layer);
1289 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1290 rgba[TGSI_NUM_CHANNELS*c] = out[c];
1291
1292 if (DEBUG_TEX) {
1293 print_sample(__FUNCTION__, rgba);
1294 }
1295 }
1296
1297
1298 static void
1299 img_filter_cube_nearest(struct sp_sampler_view *sp_sview,
1300 struct sp_sampler *sp_samp,
1301 const struct img_filter_args *args,
1302 float *rgba)
1303 {
1304 const struct pipe_resource *texture = sp_sview->base.texture;
1305 int width, height;
1306 int x, y, layerface;
1307 union tex_tile_address addr;
1308 const float *out;
1309 int c;
1310
1311 width = u_minify(texture->width0, args->level);
1312 height = u_minify(texture->height0, args->level);
1313
1314 assert(width > 0);
1315 assert(height > 0);
1316
1317 addr.value = 0;
1318 addr.bits.level = args->level;
1319
1320 /*
1321 * If NEAREST filtering is done within a miplevel, always apply wrap
1322 * mode CLAMP_TO_EDGE.
1323 */
1324 if (sp_samp->base.seamless_cube_map) {
1325 wrap_nearest_clamp_to_edge(args->s, width, args->offset[0], &x);
1326 wrap_nearest_clamp_to_edge(args->t, height, args->offset[1], &y);
1327 } else {
1328 /* Would probably make sense to ignore mode and just do edge clamp */
1329 sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
1330 sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
1331 }
1332
1333 layerface = args->face_id + sp_sview->base.u.tex.first_layer;
1334 out = get_texel_cube_array(sp_sview, sp_samp, addr, x, y, layerface);
1335 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1336 rgba[TGSI_NUM_CHANNELS*c] = out[c];
1337
1338 if (DEBUG_TEX) {
1339 print_sample(__FUNCTION__, rgba);
1340 }
1341 }
1342
1343 static void
1344 img_filter_cube_array_nearest(struct sp_sampler_view *sp_sview,
1345 struct sp_sampler *sp_samp,
1346 const struct img_filter_args *args,
1347 float *rgba)
1348 {
1349 const struct pipe_resource *texture = sp_sview->base.texture;
1350 int width, height;
1351 int x, y, layerface;
1352 union tex_tile_address addr;
1353 const float *out;
1354 int c;
1355
1356 width = u_minify(texture->width0, args->level);
1357 height = u_minify(texture->height0, args->level);
1358
1359 assert(width > 0);
1360 assert(height > 0);
1361
1362 addr.value = 0;
1363 addr.bits.level = args->level;
1364
1365 sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
1366 sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
1367 layerface = coord_to_layer(6 * args->p + sp_sview->base.u.tex.first_layer,
1368 sp_sview->base.u.tex.first_layer,
1369 sp_sview->base.u.tex.last_layer - 5) + args->face_id;
1370
1371 out = get_texel_cube_array(sp_sview, sp_samp, addr, x, y, layerface);
1372 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1373 rgba[TGSI_NUM_CHANNELS*c] = out[c];
1374
1375 if (DEBUG_TEX) {
1376 print_sample(__FUNCTION__, rgba);
1377 }
1378 }
1379
1380 static void
1381 img_filter_3d_nearest(struct sp_sampler_view *sp_sview,
1382 struct sp_sampler *sp_samp,
1383 const struct img_filter_args *args,
1384 float *rgba)
1385 {
1386 const struct pipe_resource *texture = sp_sview->base.texture;
1387 int width, height, depth;
1388 int x, y, z;
1389 union tex_tile_address addr;
1390 const float *out;
1391 int c;
1392
1393 width = u_minify(texture->width0, args->level);
1394 height = u_minify(texture->height0, args->level);
1395 depth = u_minify(texture->depth0, args->level);
1396
1397 assert(width > 0);
1398 assert(height > 0);
1399 assert(depth > 0);
1400
1401 sp_samp->nearest_texcoord_s(args->s, width, args->offset[0], &x);
1402 sp_samp->nearest_texcoord_t(args->t, height, args->offset[1], &y);
1403 sp_samp->nearest_texcoord_p(args->p, depth, args->offset[2], &z);
1404
1405 addr.value = 0;
1406 addr.bits.level = args->level;
1407
1408 out = get_texel_3d(sp_sview, sp_samp, addr, x, y, z);
1409 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1410 rgba[TGSI_NUM_CHANNELS*c] = out[c];
1411 }
1412
1413
1414 static void
1415 img_filter_1d_linear(struct sp_sampler_view *sp_sview,
1416 struct sp_sampler *sp_samp,
1417 const struct img_filter_args *args,
1418 float *rgba)
1419 {
1420 const struct pipe_resource *texture = sp_sview->base.texture;
1421 int width;
1422 int x0, x1;
1423 float xw; /* weights */
1424 union tex_tile_address addr;
1425 const float *tx0, *tx1;
1426 int c;
1427
1428 width = u_minify(texture->width0, args->level);
1429
1430 assert(width > 0);
1431
1432 addr.value = 0;
1433 addr.bits.level = args->level;
1434
1435 sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw);
1436
1437 tx0 = get_texel_2d(sp_sview, sp_samp, addr, x0, 0);
1438 tx1 = get_texel_2d(sp_sview, sp_samp, addr, x1, 0);
1439
1440 /* interpolate R, G, B, A */
1441 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1442 rgba[TGSI_NUM_CHANNELS*c] = lerp(xw, tx0[c], tx1[c]);
1443 }
1444
1445
1446 static void
1447 img_filter_1d_array_linear(struct sp_sampler_view *sp_sview,
1448 struct sp_sampler *sp_samp,
1449 const struct img_filter_args *args,
1450 float *rgba)
1451 {
1452 const struct pipe_resource *texture = sp_sview->base.texture;
1453 int width;
1454 int x0, x1, layer;
1455 float xw; /* weights */
1456 union tex_tile_address addr;
1457 const float *tx0, *tx1;
1458 int c;
1459
1460 width = u_minify(texture->width0, args->level);
1461
1462 assert(width > 0);
1463
1464 addr.value = 0;
1465 addr.bits.level = args->level;
1466
1467 sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw);
1468 layer = coord_to_layer(args->t, sp_sview->base.u.tex.first_layer,
1469 sp_sview->base.u.tex.last_layer);
1470
1471 tx0 = get_texel_1d_array(sp_sview, sp_samp, addr, x0, layer);
1472 tx1 = get_texel_1d_array(sp_sview, sp_samp, addr, x1, layer);
1473
1474 /* interpolate R, G, B, A */
1475 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1476 rgba[TGSI_NUM_CHANNELS*c] = lerp(xw, tx0[c], tx1[c]);
1477 }
1478
1479 /*
1480 * Retrieve the gathered value, need to convert to the
1481 * TGSI expected interface, and take component select
1482 * and swizzling into account.
1483 */
1484 static float
1485 get_gather_value(const struct sp_sampler_view *sp_sview,
1486 int chan_in, int comp_sel,
1487 const float *tx[4])
1488 {
1489 int chan;
1490 unsigned swizzle;
1491
1492 /*
1493 * softpipe samples in a different order
1494 * to TGSI expects, so we need to swizzle,
1495 * the samples into the correct slots.
1496 */
1497 switch (chan_in) {
1498 case 0:
1499 chan = 2;
1500 break;
1501 case 1:
1502 chan = 3;
1503 break;
1504 case 2:
1505 chan = 1;
1506 break;
1507 case 3:
1508 chan = 0;
1509 break;
1510 default:
1511 assert(0);
1512 return 0.0;
1513 }
1514
1515 /* pick which component to use for the swizzle */
1516 switch (comp_sel) {
1517 case 0:
1518 swizzle = sp_sview->base.swizzle_r;
1519 break;
1520 case 1:
1521 swizzle = sp_sview->base.swizzle_g;
1522 break;
1523 case 2:
1524 swizzle = sp_sview->base.swizzle_b;
1525 break;
1526 case 3:
1527 swizzle = sp_sview->base.swizzle_a;
1528 break;
1529 default:
1530 assert(0);
1531 return 0.0;
1532 }
1533
1534 /* get correct result using the channel and swizzle */
1535 switch (swizzle) {
1536 case PIPE_SWIZZLE_ZERO:
1537 return 0.0;
1538 case PIPE_SWIZZLE_ONE:
1539 return 1.0;
1540 default:
1541 return tx[chan][swizzle];
1542 }
1543 }
1544
1545
1546 static void
1547 img_filter_2d_linear(struct sp_sampler_view *sp_sview,
1548 struct sp_sampler *sp_samp,
1549 const struct img_filter_args *args,
1550 float *rgba)
1551 {
1552 const struct pipe_resource *texture = sp_sview->base.texture;
1553 int width, height;
1554 int x0, y0, x1, y1;
1555 float xw, yw; /* weights */
1556 union tex_tile_address addr;
1557 const float *tx[4];
1558 int c;
1559
1560 width = u_minify(texture->width0, args->level);
1561 height = u_minify(texture->height0, args->level);
1562
1563 assert(width > 0);
1564 assert(height > 0);
1565
1566 addr.value = 0;
1567 addr.bits.level = args->level;
1568
1569 sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw);
1570 sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
1571
1572 tx[0] = get_texel_2d(sp_sview, sp_samp, addr, x0, y0);
1573 tx[1] = get_texel_2d(sp_sview, sp_samp, addr, x1, y0);
1574 tx[2] = get_texel_2d(sp_sview, sp_samp, addr, x0, y1);
1575 tx[3] = get_texel_2d(sp_sview, sp_samp, addr, x1, y1);
1576
1577 if (args->gather_only) {
1578 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1579 rgba[TGSI_NUM_CHANNELS*c] = get_gather_value(sp_sview, c,
1580 args->gather_comp,
1581 tx);
1582 } else {
1583 /* interpolate R, G, B, A */
1584 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1585 rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1586 tx[0][c], tx[1][c],
1587 tx[2][c], tx[3][c]);
1588 }
1589 }
1590
1591
1592 static void
1593 img_filter_2d_array_linear(struct sp_sampler_view *sp_sview,
1594 struct sp_sampler *sp_samp,
1595 const struct img_filter_args *args,
1596 float *rgba)
1597 {
1598 const struct pipe_resource *texture = sp_sview->base.texture;
1599 int width, height;
1600 int x0, y0, x1, y1, layer;
1601 float xw, yw; /* weights */
1602 union tex_tile_address addr;
1603 const float *tx[4];
1604 int c;
1605
1606 width = u_minify(texture->width0, args->level);
1607 height = u_minify(texture->height0, args->level);
1608
1609 assert(width > 0);
1610 assert(height > 0);
1611
1612 addr.value = 0;
1613 addr.bits.level = args->level;
1614
1615 sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw);
1616 sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
1617 layer = coord_to_layer(args->p, sp_sview->base.u.tex.first_layer,
1618 sp_sview->base.u.tex.last_layer);
1619
1620 tx[0] = get_texel_2d_array(sp_sview, sp_samp, addr, x0, y0, layer);
1621 tx[1] = get_texel_2d_array(sp_sview, sp_samp, addr, x1, y0, layer);
1622 tx[2] = get_texel_2d_array(sp_sview, sp_samp, addr, x0, y1, layer);
1623 tx[3] = get_texel_2d_array(sp_sview, sp_samp, addr, x1, y1, layer);
1624
1625 if (args->gather_only) {
1626 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1627 rgba[TGSI_NUM_CHANNELS*c] = get_gather_value(sp_sview, c,
1628 args->gather_comp,
1629 tx);
1630 } else {
1631 /* interpolate R, G, B, A */
1632 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1633 rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1634 tx[0][c], tx[1][c],
1635 tx[2][c], tx[3][c]);
1636 }
1637 }
1638
1639
1640 static void
1641 img_filter_cube_linear(struct sp_sampler_view *sp_sview,
1642 struct sp_sampler *sp_samp,
1643 const struct img_filter_args *args,
1644 float *rgba)
1645 {
1646 const struct pipe_resource *texture = sp_sview->base.texture;
1647 int width, height;
1648 int x0, y0, x1, y1, layer;
1649 float xw, yw; /* weights */
1650 union tex_tile_address addr;
1651 const float *tx[4];
1652 float corner0[TGSI_QUAD_SIZE], corner1[TGSI_QUAD_SIZE],
1653 corner2[TGSI_QUAD_SIZE], corner3[TGSI_QUAD_SIZE];
1654 int c;
1655
1656 width = u_minify(texture->width0, args->level);
1657 height = u_minify(texture->height0, args->level);
1658
1659 assert(width > 0);
1660 assert(height > 0);
1661
1662 addr.value = 0;
1663 addr.bits.level = args->level;
1664
1665 /*
1666 * For seamless if LINEAR filtering is done within a miplevel,
1667 * always apply wrap mode CLAMP_TO_BORDER.
1668 */
1669 if (sp_samp->base.seamless_cube_map) {
1670 /* Note this is a bit overkill, actual clamping is not required */
1671 wrap_linear_clamp_to_border(args->s, width, args->offset[0], &x0, &x1, &xw);
1672 wrap_linear_clamp_to_border(args->t, height, args->offset[1], &y0, &y1, &yw);
1673 } else {
1674 /* Would probably make sense to ignore mode and just do edge clamp */
1675 sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw);
1676 sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
1677 }
1678
1679 layer = sp_sview->base.u.tex.first_layer;
1680
1681 if (sp_samp->base.seamless_cube_map) {
1682 tx[0] = get_texel_cube_seamless(sp_sview, addr, x0, y0, corner0, layer, args->face_id);
1683 tx[1] = get_texel_cube_seamless(sp_sview, addr, x1, y0, corner1, layer, args->face_id);
1684 tx[2] = get_texel_cube_seamless(sp_sview, addr, x0, y1, corner2, layer, args->face_id);
1685 tx[3] = get_texel_cube_seamless(sp_sview, addr, x1, y1, corner3, layer, args->face_id);
1686 } else {
1687 tx[0] = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y0, layer + args->face_id);
1688 tx[1] = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y0, layer + args->face_id);
1689 tx[2] = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y1, layer + args->face_id);
1690 tx[3] = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y1, layer + args->face_id);
1691 }
1692
1693 if (args->gather_only) {
1694 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1695 rgba[TGSI_NUM_CHANNELS*c] = get_gather_value(sp_sview, c,
1696 args->gather_comp,
1697 tx);
1698 } else {
1699 /* interpolate R, G, B, A */
1700 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1701 rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1702 tx[0][c], tx[1][c],
1703 tx[2][c], tx[3][c]);
1704 }
1705 }
1706
1707
1708 static void
1709 img_filter_cube_array_linear(struct sp_sampler_view *sp_sview,
1710 struct sp_sampler *sp_samp,
1711 const struct img_filter_args *args,
1712 float *rgba)
1713 {
1714 const struct pipe_resource *texture = sp_sview->base.texture;
1715 int width, height;
1716 int x0, y0, x1, y1, layer;
1717 float xw, yw; /* weights */
1718 union tex_tile_address addr;
1719 const float *tx[4];
1720 float corner0[TGSI_QUAD_SIZE], corner1[TGSI_QUAD_SIZE],
1721 corner2[TGSI_QUAD_SIZE], corner3[TGSI_QUAD_SIZE];
1722 int c;
1723
1724 width = u_minify(texture->width0, args->level);
1725 height = u_minify(texture->height0, args->level);
1726
1727 assert(width > 0);
1728 assert(height > 0);
1729
1730 addr.value = 0;
1731 addr.bits.level = args->level;
1732
1733 /*
1734 * For seamless if LINEAR filtering is done within a miplevel,
1735 * always apply wrap mode CLAMP_TO_BORDER.
1736 */
1737 if (sp_samp->base.seamless_cube_map) {
1738 /* Note this is a bit overkill, actual clamping is not required */
1739 wrap_linear_clamp_to_border(args->s, width, args->offset[0], &x0, &x1, &xw);
1740 wrap_linear_clamp_to_border(args->t, height, args->offset[1], &y0, &y1, &yw);
1741 } else {
1742 /* Would probably make sense to ignore mode and just do edge clamp */
1743 sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw);
1744 sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
1745 }
1746
1747 layer = coord_to_layer(6 * args->p + sp_sview->base.u.tex.first_layer,
1748 sp_sview->base.u.tex.first_layer,
1749 sp_sview->base.u.tex.last_layer - 5);
1750
1751 if (sp_samp->base.seamless_cube_map) {
1752 tx[0] = get_texel_cube_seamless(sp_sview, addr, x0, y0, corner0, layer, args->face_id);
1753 tx[1] = get_texel_cube_seamless(sp_sview, addr, x1, y0, corner1, layer, args->face_id);
1754 tx[2] = get_texel_cube_seamless(sp_sview, addr, x0, y1, corner2, layer, args->face_id);
1755 tx[3] = get_texel_cube_seamless(sp_sview, addr, x1, y1, corner3, layer, args->face_id);
1756 } else {
1757 tx[0] = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y0, layer + args->face_id);
1758 tx[1] = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y0, layer + args->face_id);
1759 tx[2] = get_texel_cube_array(sp_sview, sp_samp, addr, x0, y1, layer + args->face_id);
1760 tx[3] = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y1, layer + args->face_id);
1761 }
1762
1763 if (args->gather_only) {
1764 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1765 rgba[TGSI_NUM_CHANNELS*c] = get_gather_value(sp_sview, c,
1766 args->gather_comp,
1767 tx);
1768 } else {
1769 /* interpolate R, G, B, A */
1770 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1771 rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
1772 tx[0][c], tx[1][c],
1773 tx[2][c], tx[3][c]);
1774 }
1775 }
1776
1777 static void
1778 img_filter_3d_linear(struct sp_sampler_view *sp_sview,
1779 struct sp_sampler *sp_samp,
1780 const struct img_filter_args *args,
1781 float *rgba)
1782 {
1783 const struct pipe_resource *texture = sp_sview->base.texture;
1784 int width, height, depth;
1785 int x0, x1, y0, y1, z0, z1;
1786 float xw, yw, zw; /* interpolation weights */
1787 union tex_tile_address addr;
1788 const float *tx00, *tx01, *tx02, *tx03, *tx10, *tx11, *tx12, *tx13;
1789 int c;
1790
1791 width = u_minify(texture->width0, args->level);
1792 height = u_minify(texture->height0, args->level);
1793 depth = u_minify(texture->depth0, args->level);
1794
1795 addr.value = 0;
1796 addr.bits.level = args->level;
1797
1798 assert(width > 0);
1799 assert(height > 0);
1800 assert(depth > 0);
1801
1802 sp_samp->linear_texcoord_s(args->s, width, args->offset[0], &x0, &x1, &xw);
1803 sp_samp->linear_texcoord_t(args->t, height, args->offset[1], &y0, &y1, &yw);
1804 sp_samp->linear_texcoord_p(args->p, depth, args->offset[2], &z0, &z1, &zw);
1805
1806 tx00 = get_texel_3d(sp_sview, sp_samp, addr, x0, y0, z0);
1807 tx01 = get_texel_3d(sp_sview, sp_samp, addr, x1, y0, z0);
1808 tx02 = get_texel_3d(sp_sview, sp_samp, addr, x0, y1, z0);
1809 tx03 = get_texel_3d(sp_sview, sp_samp, addr, x1, y1, z0);
1810
1811 tx10 = get_texel_3d(sp_sview, sp_samp, addr, x0, y0, z1);
1812 tx11 = get_texel_3d(sp_sview, sp_samp, addr, x1, y0, z1);
1813 tx12 = get_texel_3d(sp_sview, sp_samp, addr, x0, y1, z1);
1814 tx13 = get_texel_3d(sp_sview, sp_samp, addr, x1, y1, z1);
1815
1816 /* interpolate R, G, B, A */
1817 for (c = 0; c < TGSI_QUAD_SIZE; c++)
1818 rgba[TGSI_NUM_CHANNELS*c] = lerp_3d(xw, yw, zw,
1819 tx00[c], tx01[c],
1820 tx02[c], tx03[c],
1821 tx10[c], tx11[c],
1822 tx12[c], tx13[c]);
1823 }
1824
1825
1826 /* Calculate level of detail for every fragment,
1827 * with lambda already computed.
1828 * Note that lambda has already been biased by global LOD bias.
1829 * \param biased_lambda per-quad lambda.
1830 * \param lod_in per-fragment lod_bias or explicit_lod.
1831 * \param lod returns the per-fragment lod.
1832 */
1833 static INLINE void
1834 compute_lod(const struct pipe_sampler_state *sampler,
1835 enum tgsi_sampler_control control,
1836 const float biased_lambda,
1837 const float lod_in[TGSI_QUAD_SIZE],
1838 float lod[TGSI_QUAD_SIZE])
1839 {
1840 float min_lod = sampler->min_lod;
1841 float max_lod = sampler->max_lod;
1842 uint i;
1843
1844 switch (control) {
1845 case tgsi_sampler_lod_none:
1846 case tgsi_sampler_lod_zero:
1847 /* XXX FIXME */
1848 case tgsi_sampler_derivs_explicit:
1849 lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(biased_lambda, min_lod, max_lod);
1850 break;
1851 case tgsi_sampler_lod_bias:
1852 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1853 lod[i] = biased_lambda + lod_in[i];
1854 lod[i] = CLAMP(lod[i], min_lod, max_lod);
1855 }
1856 break;
1857 case tgsi_sampler_lod_explicit:
1858 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1859 lod[i] = CLAMP(lod_in[i], min_lod, max_lod);
1860 }
1861 break;
1862 default:
1863 assert(0);
1864 lod[0] = lod[1] = lod[2] = lod[3] = 0.0f;
1865 }
1866 }
1867
1868
1869 /* Calculate level of detail for every fragment.
1870 * \param lod_in per-fragment lod_bias or explicit_lod.
1871 * \param lod results per-fragment lod.
1872 */
1873 static INLINE void
1874 compute_lambda_lod(struct sp_sampler_view *sp_sview,
1875 struct sp_sampler *sp_samp,
1876 const float s[TGSI_QUAD_SIZE],
1877 const float t[TGSI_QUAD_SIZE],
1878 const float p[TGSI_QUAD_SIZE],
1879 const float lod_in[TGSI_QUAD_SIZE],
1880 enum tgsi_sampler_control control,
1881 float lod[TGSI_QUAD_SIZE])
1882 {
1883 const struct pipe_sampler_state *sampler = &sp_samp->base;
1884 float lod_bias = sampler->lod_bias;
1885 float min_lod = sampler->min_lod;
1886 float max_lod = sampler->max_lod;
1887 float lambda;
1888 uint i;
1889
1890 switch (control) {
1891 case tgsi_sampler_lod_none:
1892 case tgsi_sampler_gather:
1893 /* XXX FIXME */
1894 case tgsi_sampler_derivs_explicit:
1895 lambda = sp_sview->compute_lambda(sp_sview, s, t, p) + lod_bias;
1896 lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(lambda, min_lod, max_lod);
1897 break;
1898 case tgsi_sampler_lod_bias:
1899 lambda = sp_sview->compute_lambda(sp_sview, s, t, p) + lod_bias;
1900 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1901 lod[i] = lambda + lod_in[i];
1902 lod[i] = CLAMP(lod[i], min_lod, max_lod);
1903 }
1904 break;
1905 case tgsi_sampler_lod_explicit:
1906 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1907 lod[i] = CLAMP(lod_in[i], min_lod, max_lod);
1908 }
1909 break;
1910 case tgsi_sampler_lod_zero:
1911 /* this is all static state in the sampler really need clamp here? */
1912 lod[0] = lod[1] = lod[2] = lod[3] = CLAMP(lod_bias, min_lod, max_lod);
1913 break;
1914 default:
1915 assert(0);
1916 lod[0] = lod[1] = lod[2] = lod[3] = 0.0f;
1917 }
1918 }
1919
1920 static INLINE unsigned
1921 get_gather_component(const float lod_in[TGSI_QUAD_SIZE])
1922 {
1923 /* gather component is stored in lod_in slot as unsigned */
1924 return (*(unsigned int *)lod_in) & 0x3;
1925 }
1926
1927 static void
1928 mip_filter_linear(struct sp_sampler_view *sp_sview,
1929 struct sp_sampler *sp_samp,
1930 img_filter_func min_filter,
1931 img_filter_func mag_filter,
1932 const float s[TGSI_QUAD_SIZE],
1933 const float t[TGSI_QUAD_SIZE],
1934 const float p[TGSI_QUAD_SIZE],
1935 const float c0[TGSI_QUAD_SIZE],
1936 const float lod_in[TGSI_QUAD_SIZE],
1937 const struct filter_args *filt_args,
1938 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
1939 {
1940 const struct pipe_sampler_view *psview = &sp_sview->base;
1941 int j;
1942 float lod[TGSI_QUAD_SIZE];
1943 struct img_filter_args args;
1944
1945 compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod);
1946
1947 args.offset = filt_args->offset;
1948 args.gather_only = filt_args->control == tgsi_sampler_gather;
1949 args.gather_comp = get_gather_component(lod_in);
1950
1951 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
1952 int level0 = psview->u.tex.first_level + (int)lod[j];
1953
1954 args.s = s[j];
1955 args.t = t[j];
1956 args.p = p[j];
1957 args.face_id = sp_sview->faces[j];
1958
1959 if (lod[j] < 0.0) {
1960 args.level = psview->u.tex.first_level;
1961 mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
1962 }
1963 else if (level0 >= (int) psview->u.tex.last_level) {
1964 args.level = psview->u.tex.last_level;
1965 min_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
1966 }
1967 else {
1968 float levelBlend = frac(lod[j]);
1969 float rgbax[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
1970 int c;
1971
1972 args.level = level0;
1973 min_filter(sp_sview, sp_samp, &args, &rgbax[0][0]);
1974 args.level = level0+1;
1975 min_filter(sp_sview, sp_samp, &args, &rgbax[0][1]);
1976
1977 for (c = 0; c < 4; c++) {
1978 rgba[c][j] = lerp(levelBlend, rgbax[c][0], rgbax[c][1]);
1979 }
1980 }
1981 }
1982
1983 if (DEBUG_TEX) {
1984 print_sample_4(__FUNCTION__, rgba);
1985 }
1986 }
1987
1988
1989 /**
1990 * Compute nearest mipmap level from texcoords.
1991 * Then sample the texture level for four elements of a quad.
1992 * \param c0 the LOD bias factors, or absolute LODs (depending on control)
1993 */
1994 static void
1995 mip_filter_nearest(struct sp_sampler_view *sp_sview,
1996 struct sp_sampler *sp_samp,
1997 img_filter_func min_filter,
1998 img_filter_func mag_filter,
1999 const float s[TGSI_QUAD_SIZE],
2000 const float t[TGSI_QUAD_SIZE],
2001 const float p[TGSI_QUAD_SIZE],
2002 const float c0[TGSI_QUAD_SIZE],
2003 const float lod_in[TGSI_QUAD_SIZE],
2004 const struct filter_args *filt_args,
2005 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2006 {
2007 const struct pipe_sampler_view *psview = &sp_sview->base;
2008 float lod[TGSI_QUAD_SIZE];
2009 int j;
2010 struct img_filter_args args;
2011
2012 args.offset = filt_args->offset;
2013 args.gather_only = filt_args->control == tgsi_sampler_gather;
2014 args.gather_comp = get_gather_component(lod_in);
2015
2016 compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod);
2017
2018 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2019 args.s = s[j];
2020 args.t = t[j];
2021 args.p = p[j];
2022 args.face_id = sp_sview->faces[j];
2023
2024 if (lod[j] < 0.0) {
2025 args.level = psview->u.tex.first_level;
2026 mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
2027 } else {
2028 int level = psview->u.tex.first_level + (int)(lod[j] + 0.5F);
2029 args.level = MIN2(level, (int)psview->u.tex.last_level);
2030 min_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
2031 }
2032 }
2033
2034 if (DEBUG_TEX) {
2035 print_sample_4(__FUNCTION__, rgba);
2036 }
2037 }
2038
2039
2040 static void
2041 mip_filter_none(struct sp_sampler_view *sp_sview,
2042 struct sp_sampler *sp_samp,
2043 img_filter_func min_filter,
2044 img_filter_func mag_filter,
2045 const float s[TGSI_QUAD_SIZE],
2046 const float t[TGSI_QUAD_SIZE],
2047 const float p[TGSI_QUAD_SIZE],
2048 const float c0[TGSI_QUAD_SIZE],
2049 const float lod_in[TGSI_QUAD_SIZE],
2050 const struct filter_args *filt_args,
2051 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2052 {
2053 float lod[TGSI_QUAD_SIZE];
2054 int j;
2055 struct img_filter_args args;
2056
2057 args.level = sp_sview->base.u.tex.first_level;
2058 args.offset = filt_args->offset;
2059 args.gather_only = filt_args->control == tgsi_sampler_gather;
2060
2061 compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod);
2062
2063 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2064 args.s = s[j];
2065 args.t = t[j];
2066 args.p = p[j];
2067 args.face_id = sp_sview->faces[j];
2068 if (lod[j] < 0.0) {
2069 mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
2070 }
2071 else {
2072 min_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
2073 }
2074 }
2075 }
2076
2077
2078 static void
2079 mip_filter_none_no_filter_select(struct sp_sampler_view *sp_sview,
2080 struct sp_sampler *sp_samp,
2081 img_filter_func min_filter,
2082 img_filter_func mag_filter,
2083 const float s[TGSI_QUAD_SIZE],
2084 const float t[TGSI_QUAD_SIZE],
2085 const float p[TGSI_QUAD_SIZE],
2086 const float c0[TGSI_QUAD_SIZE],
2087 const float lod_in[TGSI_QUAD_SIZE],
2088 const struct filter_args *filt_args,
2089 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2090 {
2091 int j;
2092 struct img_filter_args args;
2093 args.level = sp_sview->base.u.tex.first_level;
2094 args.offset = filt_args->offset;
2095 args.gather_only = filt_args->control == tgsi_sampler_gather;
2096 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2097 args.s = s[j];
2098 args.t = t[j];
2099 args.p = p[j];
2100 args.face_id = sp_sview->faces[j];
2101 mag_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
2102 }
2103 }
2104
2105
2106 /* For anisotropic filtering */
2107 #define WEIGHT_LUT_SIZE 1024
2108
2109 static float *weightLut = NULL;
2110
2111 /**
2112 * Creates the look-up table used to speed-up EWA sampling
2113 */
2114 static void
2115 create_filter_table(void)
2116 {
2117 unsigned i;
2118 if (!weightLut) {
2119 weightLut = (float *) MALLOC(WEIGHT_LUT_SIZE * sizeof(float));
2120
2121 for (i = 0; i < WEIGHT_LUT_SIZE; ++i) {
2122 float alpha = 2;
2123 float r2 = (float) i / (float) (WEIGHT_LUT_SIZE - 1);
2124 float weight = (float) exp(-alpha * r2);
2125 weightLut[i] = weight;
2126 }
2127 }
2128 }
2129
2130
2131 /**
2132 * Elliptical weighted average (EWA) filter for producing high quality
2133 * anisotropic filtered results.
2134 * Based on the Higher Quality Elliptical Weighted Average Filter
2135 * published by Paul S. Heckbert in his Master's Thesis
2136 * "Fundamentals of Texture Mapping and Image Warping" (1989)
2137 */
2138 static void
2139 img_filter_2d_ewa(struct sp_sampler_view *sp_sview,
2140 struct sp_sampler *sp_samp,
2141 img_filter_func min_filter,
2142 img_filter_func mag_filter,
2143 const float s[TGSI_QUAD_SIZE],
2144 const float t[TGSI_QUAD_SIZE],
2145 const float p[TGSI_QUAD_SIZE],
2146 unsigned level,
2147 const float dudx, const float dvdx,
2148 const float dudy, const float dvdy,
2149 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2150 {
2151 const struct pipe_resource *texture = sp_sview->base.texture;
2152
2153 // ??? Won't the image filters blow up if level is negative?
2154 unsigned level0 = level > 0 ? level : 0;
2155 float scaling = 1.0f / (1 << level0);
2156 int width = u_minify(texture->width0, level0);
2157 int height = u_minify(texture->height0, level0);
2158 struct img_filter_args args;
2159 float ux = dudx * scaling;
2160 float vx = dvdx * scaling;
2161 float uy = dudy * scaling;
2162 float vy = dvdy * scaling;
2163
2164 /* compute ellipse coefficients to bound the region:
2165 * A*x*x + B*x*y + C*y*y = F.
2166 */
2167 float A = vx*vx+vy*vy+1;
2168 float B = -2*(ux*vx+uy*vy);
2169 float C = ux*ux+uy*uy+1;
2170 float F = A*C-B*B/4.0f;
2171
2172 /* check if it is an ellipse */
2173 /* assert(F > 0.0); */
2174
2175 /* Compute the ellipse's (u,v) bounding box in texture space */
2176 float d = -B*B+4.0f*C*A;
2177 float box_u = 2.0f / d * sqrtf(d*C*F); /* box_u -> half of bbox with */
2178 float box_v = 2.0f / d * sqrtf(A*d*F); /* box_v -> half of bbox height */
2179
2180 float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2181 float s_buffer[TGSI_QUAD_SIZE];
2182 float t_buffer[TGSI_QUAD_SIZE];
2183 float weight_buffer[TGSI_QUAD_SIZE];
2184 unsigned buffer_next;
2185 int j;
2186 float den; /* = 0.0F; */
2187 float ddq;
2188 float U; /* = u0 - tex_u; */
2189 int v;
2190
2191 /* Scale ellipse formula to directly index the Filter Lookup Table.
2192 * i.e. scale so that F = WEIGHT_LUT_SIZE-1
2193 */
2194 double formScale = (double) (WEIGHT_LUT_SIZE - 1) / F;
2195 A *= formScale;
2196 B *= formScale;
2197 C *= formScale;
2198 /* F *= formScale; */ /* no need to scale F as we don't use it below here */
2199
2200 /* For each quad, the du and dx values are the same and so the ellipse is
2201 * also the same. Note that texel/image access can only be performed using
2202 * a quad, i.e. it is not possible to get the pixel value for a single
2203 * tex coord. In order to have a better performance, the access is buffered
2204 * using the s_buffer/t_buffer and weight_buffer. Only when the buffer is
2205 * full, then the pixel values are read from the image.
2206 */
2207 ddq = 2 * A;
2208
2209 args.level = level;
2210 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2211 /* Heckbert MS thesis, p. 59; scan over the bounding box of the ellipse
2212 * and incrementally update the value of Ax^2+Bxy*Cy^2; when this
2213 * value, q, is less than F, we're inside the ellipse
2214 */
2215 float tex_u = -0.5F + s[j] * texture->width0 * scaling;
2216 float tex_v = -0.5F + t[j] * texture->height0 * scaling;
2217
2218 int u0 = (int) floorf(tex_u - box_u);
2219 int u1 = (int) ceilf(tex_u + box_u);
2220 int v0 = (int) floorf(tex_v - box_v);
2221 int v1 = (int) ceilf(tex_v + box_v);
2222
2223 float num[4] = {0.0F, 0.0F, 0.0F, 0.0F};
2224 buffer_next = 0;
2225 den = 0;
2226 args.face_id = sp_sview->faces[j];
2227
2228 U = u0 - tex_u;
2229 for (v = v0; v <= v1; ++v) {
2230 float V = v - tex_v;
2231 float dq = A * (2 * U + 1) + B * V;
2232 float q = (C * V + B * U) * V + A * U * U;
2233
2234 int u;
2235 for (u = u0; u <= u1; ++u) {
2236 /* Note that the ellipse has been pre-scaled so F =
2237 * WEIGHT_LUT_SIZE - 1
2238 */
2239 if (q < WEIGHT_LUT_SIZE) {
2240 /* as a LUT is used, q must never be negative;
2241 * should not happen, though
2242 */
2243 const int qClamped = q >= 0.0F ? q : 0;
2244 float weight = weightLut[qClamped];
2245
2246 weight_buffer[buffer_next] = weight;
2247 s_buffer[buffer_next] = u / ((float) width);
2248 t_buffer[buffer_next] = v / ((float) height);
2249
2250 buffer_next++;
2251 if (buffer_next == TGSI_QUAD_SIZE) {
2252 /* 4 texel coords are in the buffer -> read it now */
2253 unsigned jj;
2254 /* it is assumed that samp->min_img_filter is set to
2255 * img_filter_2d_nearest or one of the
2256 * accelerated img_filter_2d_nearest_XXX functions.
2257 */
2258 for (jj = 0; jj < buffer_next; jj++) {
2259 args.s = s_buffer[jj];
2260 args.t = t_buffer[jj];
2261 args.p = p[jj];
2262 min_filter(sp_sview, sp_samp, &args, &rgba_temp[0][jj]);
2263 num[0] += weight_buffer[jj] * rgba_temp[0][jj];
2264 num[1] += weight_buffer[jj] * rgba_temp[1][jj];
2265 num[2] += weight_buffer[jj] * rgba_temp[2][jj];
2266 num[3] += weight_buffer[jj] * rgba_temp[3][jj];
2267 }
2268
2269 buffer_next = 0;
2270 }
2271
2272 den += weight;
2273 }
2274 q += dq;
2275 dq += ddq;
2276 }
2277 }
2278
2279 /* if the tex coord buffer contains unread values, we will read
2280 * them now.
2281 */
2282 if (buffer_next > 0) {
2283 unsigned jj;
2284 /* it is assumed that samp->min_img_filter is set to
2285 * img_filter_2d_nearest or one of the
2286 * accelerated img_filter_2d_nearest_XXX functions.
2287 */
2288 for (jj = 0; jj < buffer_next; jj++) {
2289 args.s = s_buffer[jj];
2290 args.t = t_buffer[jj];
2291 args.p = p[jj];
2292 min_filter(sp_sview, sp_samp, &args, &rgba_temp[0][jj]);
2293 num[0] += weight_buffer[jj] * rgba_temp[0][jj];
2294 num[1] += weight_buffer[jj] * rgba_temp[1][jj];
2295 num[2] += weight_buffer[jj] * rgba_temp[2][jj];
2296 num[3] += weight_buffer[jj] * rgba_temp[3][jj];
2297 }
2298 }
2299
2300 if (den <= 0.0F) {
2301 /* Reaching this place would mean that no pixels intersected
2302 * the ellipse. This should never happen because the filter
2303 * we use always intersects at least one pixel.
2304 */
2305
2306 /*rgba[0]=0;
2307 rgba[1]=0;
2308 rgba[2]=0;
2309 rgba[3]=0;*/
2310 /* not enough pixels in resampling, resort to direct interpolation */
2311 args.s = s[j];
2312 args.t = t[j];
2313 args.p = p[j];
2314 min_filter(sp_sview, sp_samp, &args, &rgba_temp[0][j]);
2315 den = 1;
2316 num[0] = rgba_temp[0][j];
2317 num[1] = rgba_temp[1][j];
2318 num[2] = rgba_temp[2][j];
2319 num[3] = rgba_temp[3][j];
2320 }
2321
2322 rgba[0][j] = num[0] / den;
2323 rgba[1][j] = num[1] / den;
2324 rgba[2][j] = num[2] / den;
2325 rgba[3][j] = num[3] / den;
2326 }
2327 }
2328
2329
2330 /**
2331 * Sample 2D texture using an anisotropic filter.
2332 */
2333 static void
2334 mip_filter_linear_aniso(struct sp_sampler_view *sp_sview,
2335 struct sp_sampler *sp_samp,
2336 img_filter_func min_filter,
2337 img_filter_func mag_filter,
2338 const float s[TGSI_QUAD_SIZE],
2339 const float t[TGSI_QUAD_SIZE],
2340 const float p[TGSI_QUAD_SIZE],
2341 const float c0[TGSI_QUAD_SIZE],
2342 const float lod_in[TGSI_QUAD_SIZE],
2343 const struct filter_args *filt_args,
2344 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2345 {
2346 const struct pipe_resource *texture = sp_sview->base.texture;
2347 const struct pipe_sampler_view *psview = &sp_sview->base;
2348 int level0;
2349 float lambda;
2350 float lod[TGSI_QUAD_SIZE];
2351
2352 float s_to_u = u_minify(texture->width0, psview->u.tex.first_level);
2353 float t_to_v = u_minify(texture->height0, psview->u.tex.first_level);
2354 float dudx = (s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]) * s_to_u;
2355 float dudy = (s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]) * s_to_u;
2356 float dvdx = (t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]) * t_to_v;
2357 float dvdy = (t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT]) * t_to_v;
2358 struct img_filter_args args;
2359
2360 if (filt_args->control == tgsi_sampler_lod_bias ||
2361 filt_args->control == tgsi_sampler_lod_none ||
2362 /* XXX FIXME */
2363 filt_args->control == tgsi_sampler_derivs_explicit) {
2364 /* note: instead of working with Px and Py, we will use the
2365 * squared length instead, to avoid sqrt.
2366 */
2367 float Px2 = dudx * dudx + dvdx * dvdx;
2368 float Py2 = dudy * dudy + dvdy * dvdy;
2369
2370 float Pmax2;
2371 float Pmin2;
2372 float e;
2373 const float maxEccentricity = sp_samp->base.max_anisotropy * sp_samp->base.max_anisotropy;
2374
2375 if (Px2 < Py2) {
2376 Pmax2 = Py2;
2377 Pmin2 = Px2;
2378 }
2379 else {
2380 Pmax2 = Px2;
2381 Pmin2 = Py2;
2382 }
2383
2384 /* if the eccentricity of the ellipse is too big, scale up the shorter
2385 * of the two vectors to limit the maximum amount of work per pixel
2386 */
2387 e = Pmax2 / Pmin2;
2388 if (e > maxEccentricity) {
2389 /* float s=e / maxEccentricity;
2390 minor[0] *= s;
2391 minor[1] *= s;
2392 Pmin2 *= s; */
2393 Pmin2 = Pmax2 / maxEccentricity;
2394 }
2395
2396 /* note: we need to have Pmin=sqrt(Pmin2) here, but we can avoid
2397 * this since 0.5*log(x) = log(sqrt(x))
2398 */
2399 lambda = 0.5F * util_fast_log2(Pmin2) + sp_samp->base.lod_bias;
2400 compute_lod(&sp_samp->base, filt_args->control, lambda, lod_in, lod);
2401 }
2402 else {
2403 assert(filt_args->control == tgsi_sampler_lod_explicit ||
2404 filt_args->control == tgsi_sampler_lod_zero);
2405 compute_lod(&sp_samp->base, filt_args->control, sp_samp->base.lod_bias, lod_in, lod);
2406 }
2407
2408 /* XXX: Take into account all lod values.
2409 */
2410 lambda = lod[0];
2411 level0 = psview->u.tex.first_level + (int)lambda;
2412
2413 /* If the ellipse covers the whole image, we can
2414 * simply return the average of the whole image.
2415 */
2416 if (level0 >= (int) psview->u.tex.last_level) {
2417 int j;
2418 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2419 args.s = s[j];
2420 args.t = t[j];
2421 args.p = p[j];
2422 args.level = psview->u.tex.last_level;
2423 args.face_id = sp_sview->faces[j];
2424 min_filter(sp_sview, sp_samp, &args, &rgba[0][j]);
2425 }
2426 }
2427 else {
2428 /* don't bother interpolating between multiple LODs; it doesn't
2429 * seem to be worth the extra running time.
2430 */
2431 img_filter_2d_ewa(sp_sview, sp_samp, min_filter, mag_filter,
2432 s, t, p, level0,
2433 dudx, dvdx, dudy, dvdy, rgba);
2434 }
2435
2436 if (DEBUG_TEX) {
2437 print_sample_4(__FUNCTION__, rgba);
2438 }
2439 }
2440
2441
2442 /**
2443 * Specialized version of mip_filter_linear with hard-wired calls to
2444 * 2d lambda calculation and 2d_linear_repeat_POT img filters.
2445 */
2446 static void
2447 mip_filter_linear_2d_linear_repeat_POT(
2448 struct sp_sampler_view *sp_sview,
2449 struct sp_sampler *sp_samp,
2450 img_filter_func min_filter,
2451 img_filter_func mag_filter,
2452 const float s[TGSI_QUAD_SIZE],
2453 const float t[TGSI_QUAD_SIZE],
2454 const float p[TGSI_QUAD_SIZE],
2455 const float c0[TGSI_QUAD_SIZE],
2456 const float lod_in[TGSI_QUAD_SIZE],
2457 const struct filter_args *filt_args,
2458 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2459 {
2460 const struct pipe_sampler_view *psview = &sp_sview->base;
2461 int j;
2462 float lod[TGSI_QUAD_SIZE];
2463
2464 compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod);
2465
2466 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2467 int level0 = psview->u.tex.first_level + (int)lod[j];
2468 struct img_filter_args args;
2469 /* Catches both negative and large values of level0:
2470 */
2471 args.s = s[j];
2472 args.t = t[j];
2473 args.p = p[j];
2474 args.face_id = sp_sview->faces[j];
2475 args.gather_only = filt_args->control == tgsi_sampler_gather;
2476 if ((unsigned)level0 >= psview->u.tex.last_level) {
2477 if (level0 < 0)
2478 args.level = psview->u.tex.first_level;
2479 else
2480 args.level = psview->u.tex.last_level;
2481 img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, &args,
2482 &rgba[0][j]);
2483
2484 }
2485 else {
2486 float levelBlend = frac(lod[j]);
2487 float rgbax[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2488 int c;
2489
2490 args.level = level0;
2491 img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, &args, &rgbax[0][0]);
2492 args.level = level0+1;
2493 img_filter_2d_linear_repeat_POT(sp_sview, sp_samp, &args, &rgbax[0][1]);
2494
2495 for (c = 0; c < TGSI_NUM_CHANNELS; c++)
2496 rgba[c][j] = lerp(levelBlend, rgbax[c][0], rgbax[c][1]);
2497 }
2498 }
2499
2500 if (DEBUG_TEX) {
2501 print_sample_4(__FUNCTION__, rgba);
2502 }
2503 }
2504
2505
2506 /**
2507 * Do shadow/depth comparisons.
2508 */
2509 static void
2510 sample_compare(struct sp_sampler_view *sp_sview,
2511 struct sp_sampler *sp_samp,
2512 const float s[TGSI_QUAD_SIZE],
2513 const float t[TGSI_QUAD_SIZE],
2514 const float p[TGSI_QUAD_SIZE],
2515 const float c0[TGSI_QUAD_SIZE],
2516 const float c1[TGSI_QUAD_SIZE],
2517 enum tgsi_sampler_control control,
2518 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2519 {
2520 const struct pipe_sampler_state *sampler = &sp_samp->base;
2521 int j, v;
2522 int k[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2523 float pc[4];
2524 const struct util_format_description *format_desc;
2525 unsigned chan_type;
2526 bool is_gather = (control == tgsi_sampler_gather);
2527
2528 /**
2529 * Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
2530 * for 2D Array texture we need to use the 'c0' (aka Q).
2531 * When we sampled the depth texture, the depth value was put into all
2532 * RGBA channels. We look at the red channel here.
2533 */
2534
2535 if (sp_sview->base.target == PIPE_TEXTURE_2D_ARRAY ||
2536 sp_sview->base.target == PIPE_TEXTURE_CUBE) {
2537 pc[0] = c0[0];
2538 pc[1] = c0[1];
2539 pc[2] = c0[2];
2540 pc[3] = c0[3];
2541 } else if (sp_sview->base.target == PIPE_TEXTURE_CUBE_ARRAY) {
2542 pc[0] = c1[0];
2543 pc[1] = c1[1];
2544 pc[2] = c1[2];
2545 pc[3] = c1[3];
2546 } else {
2547 pc[0] = p[0];
2548 pc[1] = p[1];
2549 pc[2] = p[2];
2550 pc[3] = p[3];
2551 }
2552
2553 format_desc = util_format_description(sp_sview->base.format);
2554 /* not entirely sure we couldn't end up with non-valid swizzle here */
2555 chan_type = format_desc->swizzle[0] <= UTIL_FORMAT_SWIZZLE_W ?
2556 format_desc->channel[format_desc->swizzle[0]].type :
2557 UTIL_FORMAT_TYPE_FLOAT;
2558 if (chan_type != UTIL_FORMAT_TYPE_FLOAT) {
2559 /*
2560 * clamping is a result of conversion to texture format, hence
2561 * doesn't happen with floats. Technically also should do comparison
2562 * in texture format (quantization!).
2563 */
2564 pc[0] = CLAMP(pc[0], 0.0F, 1.0F);
2565 pc[1] = CLAMP(pc[1], 0.0F, 1.0F);
2566 pc[2] = CLAMP(pc[2], 0.0F, 1.0F);
2567 pc[3] = CLAMP(pc[3], 0.0F, 1.0F);
2568 }
2569
2570 for (v = 0; v < (is_gather ? TGSI_NUM_CHANNELS : 1); v++) {
2571 /* compare four texcoords vs. four texture samples */
2572 switch (sampler->compare_func) {
2573 case PIPE_FUNC_LESS:
2574 k[v][0] = pc[0] < rgba[v][0];
2575 k[v][1] = pc[1] < rgba[v][1];
2576 k[v][2] = pc[2] < rgba[v][2];
2577 k[v][3] = pc[3] < rgba[v][3];
2578 break;
2579 case PIPE_FUNC_LEQUAL:
2580 k[v][0] = pc[0] <= rgba[v][0];
2581 k[v][1] = pc[1] <= rgba[v][1];
2582 k[v][2] = pc[2] <= rgba[v][2];
2583 k[v][3] = pc[3] <= rgba[v][3];
2584 break;
2585 case PIPE_FUNC_GREATER:
2586 k[v][0] = pc[0] > rgba[v][0];
2587 k[v][1] = pc[1] > rgba[v][1];
2588 k[v][2] = pc[2] > rgba[v][2];
2589 k[v][3] = pc[3] > rgba[v][3];
2590 break;
2591 case PIPE_FUNC_GEQUAL:
2592 k[v][0] = pc[0] >= rgba[v][0];
2593 k[v][1] = pc[1] >= rgba[v][1];
2594 k[v][2] = pc[2] >= rgba[v][2];
2595 k[v][3] = pc[3] >= rgba[v][3];
2596 break;
2597 case PIPE_FUNC_EQUAL:
2598 k[v][0] = pc[0] == rgba[v][0];
2599 k[v][1] = pc[1] == rgba[v][1];
2600 k[v][2] = pc[2] == rgba[v][2];
2601 k[v][3] = pc[3] == rgba[v][3];
2602 break;
2603 case PIPE_FUNC_NOTEQUAL:
2604 k[v][0] = pc[0] != rgba[v][0];
2605 k[v][1] = pc[1] != rgba[v][1];
2606 k[v][2] = pc[2] != rgba[v][2];
2607 k[v][3] = pc[3] != rgba[v][3];
2608 break;
2609 case PIPE_FUNC_ALWAYS:
2610 k[v][0] = k[v][1] = k[v][2] = k[v][3] = 1;
2611 break;
2612 case PIPE_FUNC_NEVER:
2613 k[v][0] = k[v][1] = k[v][2] = k[v][3] = 0;
2614 break;
2615 default:
2616 k[v][0] = k[v][1] = k[v][2] = k[v][3] = 0;
2617 assert(0);
2618 break;
2619 }
2620 }
2621
2622 if (is_gather) {
2623 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2624 for (v = 0; v < TGSI_NUM_CHANNELS; v++) {
2625 rgba[v][j] = k[v][j];
2626 }
2627 }
2628 } else {
2629 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2630 rgba[0][j] = k[0][j];
2631 rgba[1][j] = k[0][j];
2632 rgba[2][j] = k[0][j];
2633 rgba[3][j] = 1.0F;
2634 }
2635 }
2636 }
2637
2638 static void
2639 do_swizzling(const struct pipe_sampler_view *sview,
2640 float in[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],
2641 float out[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2642 {
2643 int j;
2644 const unsigned swizzle_r = sview->swizzle_r;
2645 const unsigned swizzle_g = sview->swizzle_g;
2646 const unsigned swizzle_b = sview->swizzle_b;
2647 const unsigned swizzle_a = sview->swizzle_a;
2648
2649 switch (swizzle_r) {
2650 case PIPE_SWIZZLE_ZERO:
2651 for (j = 0; j < 4; j++)
2652 out[0][j] = 0.0f;
2653 break;
2654 case PIPE_SWIZZLE_ONE:
2655 for (j = 0; j < 4; j++)
2656 out[0][j] = 1.0f;
2657 break;
2658 default:
2659 assert(swizzle_r < 4);
2660 for (j = 0; j < 4; j++)
2661 out[0][j] = in[swizzle_r][j];
2662 }
2663
2664 switch (swizzle_g) {
2665 case PIPE_SWIZZLE_ZERO:
2666 for (j = 0; j < 4; j++)
2667 out[1][j] = 0.0f;
2668 break;
2669 case PIPE_SWIZZLE_ONE:
2670 for (j = 0; j < 4; j++)
2671 out[1][j] = 1.0f;
2672 break;
2673 default:
2674 assert(swizzle_g < 4);
2675 for (j = 0; j < 4; j++)
2676 out[1][j] = in[swizzle_g][j];
2677 }
2678
2679 switch (swizzle_b) {
2680 case PIPE_SWIZZLE_ZERO:
2681 for (j = 0; j < 4; j++)
2682 out[2][j] = 0.0f;
2683 break;
2684 case PIPE_SWIZZLE_ONE:
2685 for (j = 0; j < 4; j++)
2686 out[2][j] = 1.0f;
2687 break;
2688 default:
2689 assert(swizzle_b < 4);
2690 for (j = 0; j < 4; j++)
2691 out[2][j] = in[swizzle_b][j];
2692 }
2693
2694 switch (swizzle_a) {
2695 case PIPE_SWIZZLE_ZERO:
2696 for (j = 0; j < 4; j++)
2697 out[3][j] = 0.0f;
2698 break;
2699 case PIPE_SWIZZLE_ONE:
2700 for (j = 0; j < 4; j++)
2701 out[3][j] = 1.0f;
2702 break;
2703 default:
2704 assert(swizzle_a < 4);
2705 for (j = 0; j < 4; j++)
2706 out[3][j] = in[swizzle_a][j];
2707 }
2708 }
2709
2710
2711 static wrap_nearest_func
2712 get_nearest_unorm_wrap(unsigned mode)
2713 {
2714 switch (mode) {
2715 case PIPE_TEX_WRAP_CLAMP:
2716 return wrap_nearest_unorm_clamp;
2717 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2718 return wrap_nearest_unorm_clamp_to_edge;
2719 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2720 return wrap_nearest_unorm_clamp_to_border;
2721 default:
2722 debug_printf("illegal wrap mode %d with non-normalized coords\n", mode);
2723 return wrap_nearest_unorm_clamp;
2724 }
2725 }
2726
2727
2728 static wrap_nearest_func
2729 get_nearest_wrap(unsigned mode)
2730 {
2731 switch (mode) {
2732 case PIPE_TEX_WRAP_REPEAT:
2733 return wrap_nearest_repeat;
2734 case PIPE_TEX_WRAP_CLAMP:
2735 return wrap_nearest_clamp;
2736 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2737 return wrap_nearest_clamp_to_edge;
2738 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2739 return wrap_nearest_clamp_to_border;
2740 case PIPE_TEX_WRAP_MIRROR_REPEAT:
2741 return wrap_nearest_mirror_repeat;
2742 case PIPE_TEX_WRAP_MIRROR_CLAMP:
2743 return wrap_nearest_mirror_clamp;
2744 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
2745 return wrap_nearest_mirror_clamp_to_edge;
2746 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
2747 return wrap_nearest_mirror_clamp_to_border;
2748 default:
2749 assert(0);
2750 return wrap_nearest_repeat;
2751 }
2752 }
2753
2754
2755 static wrap_linear_func
2756 get_linear_unorm_wrap(unsigned mode)
2757 {
2758 switch (mode) {
2759 case PIPE_TEX_WRAP_CLAMP:
2760 return wrap_linear_unorm_clamp;
2761 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2762 return wrap_linear_unorm_clamp_to_edge;
2763 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2764 return wrap_linear_unorm_clamp_to_border;
2765 default:
2766 debug_printf("illegal wrap mode %d with non-normalized coords\n", mode);
2767 return wrap_linear_unorm_clamp;
2768 }
2769 }
2770
2771
2772 static wrap_linear_func
2773 get_linear_wrap(unsigned mode)
2774 {
2775 switch (mode) {
2776 case PIPE_TEX_WRAP_REPEAT:
2777 return wrap_linear_repeat;
2778 case PIPE_TEX_WRAP_CLAMP:
2779 return wrap_linear_clamp;
2780 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
2781 return wrap_linear_clamp_to_edge;
2782 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
2783 return wrap_linear_clamp_to_border;
2784 case PIPE_TEX_WRAP_MIRROR_REPEAT:
2785 return wrap_linear_mirror_repeat;
2786 case PIPE_TEX_WRAP_MIRROR_CLAMP:
2787 return wrap_linear_mirror_clamp;
2788 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
2789 return wrap_linear_mirror_clamp_to_edge;
2790 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
2791 return wrap_linear_mirror_clamp_to_border;
2792 default:
2793 assert(0);
2794 return wrap_linear_repeat;
2795 }
2796 }
2797
2798
2799 /**
2800 * Is swizzling needed for the given state key?
2801 */
2802 static INLINE bool
2803 any_swizzle(const struct pipe_sampler_view *view)
2804 {
2805 return (view->swizzle_r != PIPE_SWIZZLE_RED ||
2806 view->swizzle_g != PIPE_SWIZZLE_GREEN ||
2807 view->swizzle_b != PIPE_SWIZZLE_BLUE ||
2808 view->swizzle_a != PIPE_SWIZZLE_ALPHA);
2809 }
2810
2811
2812 static img_filter_func
2813 get_img_filter(const struct sp_sampler_view *sp_sview,
2814 const struct pipe_sampler_state *sampler,
2815 unsigned filter, bool gather)
2816 {
2817 switch (sp_sview->base.target) {
2818 case PIPE_BUFFER:
2819 case PIPE_TEXTURE_1D:
2820 if (filter == PIPE_TEX_FILTER_NEAREST)
2821 return img_filter_1d_nearest;
2822 else
2823 return img_filter_1d_linear;
2824 break;
2825 case PIPE_TEXTURE_1D_ARRAY:
2826 if (filter == PIPE_TEX_FILTER_NEAREST)
2827 return img_filter_1d_array_nearest;
2828 else
2829 return img_filter_1d_array_linear;
2830 break;
2831 case PIPE_TEXTURE_2D:
2832 case PIPE_TEXTURE_RECT:
2833 /* Try for fast path:
2834 */
2835 if (!gather && sp_sview->pot2d &&
2836 sampler->wrap_s == sampler->wrap_t &&
2837 sampler->normalized_coords)
2838 {
2839 switch (sampler->wrap_s) {
2840 case PIPE_TEX_WRAP_REPEAT:
2841 switch (filter) {
2842 case PIPE_TEX_FILTER_NEAREST:
2843 return img_filter_2d_nearest_repeat_POT;
2844 case PIPE_TEX_FILTER_LINEAR:
2845 return img_filter_2d_linear_repeat_POT;
2846 default:
2847 break;
2848 }
2849 break;
2850 case PIPE_TEX_WRAP_CLAMP:
2851 switch (filter) {
2852 case PIPE_TEX_FILTER_NEAREST:
2853 return img_filter_2d_nearest_clamp_POT;
2854 default:
2855 break;
2856 }
2857 }
2858 }
2859 /* Otherwise use default versions:
2860 */
2861 if (filter == PIPE_TEX_FILTER_NEAREST)
2862 return img_filter_2d_nearest;
2863 else
2864 return img_filter_2d_linear;
2865 break;
2866 case PIPE_TEXTURE_2D_ARRAY:
2867 if (filter == PIPE_TEX_FILTER_NEAREST)
2868 return img_filter_2d_array_nearest;
2869 else
2870 return img_filter_2d_array_linear;
2871 break;
2872 case PIPE_TEXTURE_CUBE:
2873 if (filter == PIPE_TEX_FILTER_NEAREST)
2874 return img_filter_cube_nearest;
2875 else
2876 return img_filter_cube_linear;
2877 break;
2878 case PIPE_TEXTURE_CUBE_ARRAY:
2879 if (filter == PIPE_TEX_FILTER_NEAREST)
2880 return img_filter_cube_array_nearest;
2881 else
2882 return img_filter_cube_array_linear;
2883 break;
2884 case PIPE_TEXTURE_3D:
2885 if (filter == PIPE_TEX_FILTER_NEAREST)
2886 return img_filter_3d_nearest;
2887 else
2888 return img_filter_3d_linear;
2889 break;
2890 default:
2891 assert(0);
2892 return img_filter_1d_nearest;
2893 }
2894 }
2895
2896
2897 static void
2898 sample_mip(struct sp_sampler_view *sp_sview,
2899 struct sp_sampler *sp_samp,
2900 const float s[TGSI_QUAD_SIZE],
2901 const float t[TGSI_QUAD_SIZE],
2902 const float p[TGSI_QUAD_SIZE],
2903 const float c0[TGSI_QUAD_SIZE],
2904 const float lod[TGSI_QUAD_SIZE],
2905 const struct filter_args *filt_args,
2906 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2907 {
2908 mip_filter_func mip_filter;
2909 img_filter_func min_img_filter = NULL;
2910 img_filter_func mag_img_filter = NULL;
2911
2912 if (filt_args->control == tgsi_sampler_gather) {
2913 mip_filter = mip_filter_nearest;
2914 min_img_filter = get_img_filter(sp_sview, &sp_samp->base, PIPE_TEX_FILTER_LINEAR, true);
2915 } else if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) {
2916 mip_filter = mip_filter_linear_2d_linear_repeat_POT;
2917 }
2918 else {
2919 mip_filter = sp_samp->mip_filter;
2920 min_img_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->min_img_filter, false);
2921 if (sp_samp->min_mag_equal) {
2922 mag_img_filter = min_img_filter;
2923 }
2924 else {
2925 mag_img_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->base.mag_img_filter, false);
2926 }
2927 }
2928
2929 mip_filter(sp_sview, sp_samp, min_img_filter, mag_img_filter,
2930 s, t, p, c0, lod, filt_args, rgba);
2931
2932 if (sp_samp->base.compare_mode != PIPE_TEX_COMPARE_NONE) {
2933 sample_compare(sp_sview, sp_samp, s, t, p, c0, lod, filt_args->control, rgba);
2934 }
2935
2936 if (sp_sview->need_swizzle && filt_args->control != tgsi_sampler_gather) {
2937 float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2938 memcpy(rgba_temp, rgba, sizeof(rgba_temp));
2939 do_swizzling(&sp_sview->base, rgba_temp, rgba);
2940 }
2941
2942 }
2943
2944
2945 /**
2946 * Use 3D texcoords to choose a cube face, then sample the 2D cube faces.
2947 * Put face info into the sampler faces[] array.
2948 */
2949 static void
2950 sample_cube(struct sp_sampler_view *sp_sview,
2951 struct sp_sampler *sp_samp,
2952 const float s[TGSI_QUAD_SIZE],
2953 const float t[TGSI_QUAD_SIZE],
2954 const float p[TGSI_QUAD_SIZE],
2955 const float c0[TGSI_QUAD_SIZE],
2956 const float c1[TGSI_QUAD_SIZE],
2957 const struct filter_args *filt_args,
2958 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
2959 {
2960 unsigned j;
2961 float ssss[4], tttt[4];
2962
2963 /* Not actually used, but the intermediate steps that do the
2964 * dereferencing don't know it.
2965 */
2966 static float pppp[4] = { 0, 0, 0, 0 };
2967
2968 pppp[0] = c0[0];
2969 pppp[1] = c0[1];
2970 pppp[2] = c0[2];
2971 pppp[3] = c0[3];
2972 /*
2973 major axis
2974 direction target sc tc ma
2975 ---------- ------------------------------- --- --- ---
2976 +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx
2977 -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx
2978 +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry
2979 -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry
2980 +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz
2981 -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz
2982 */
2983
2984 /* Choose the cube face and compute new s/t coords for the 2D face.
2985 *
2986 * Use the same cube face for all four pixels in the quad.
2987 *
2988 * This isn't ideal, but if we want to use a different cube face
2989 * per pixel in the quad, we'd have to also compute the per-face
2990 * LOD here too. That's because the four post-face-selection
2991 * texcoords are no longer related to each other (they're
2992 * per-face!) so we can't use subtraction to compute the partial
2993 * deriviates to compute the LOD. Doing so (near cube edges
2994 * anyway) gives us pretty much random values.
2995 */
2996 {
2997 /* use the average of the four pixel's texcoords to choose the face */
2998 const float rx = 0.25F * (s[0] + s[1] + s[2] + s[3]);
2999 const float ry = 0.25F * (t[0] + t[1] + t[2] + t[3]);
3000 const float rz = 0.25F * (p[0] + p[1] + p[2] + p[3]);
3001 const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz);
3002
3003 if (arx >= ary && arx >= arz) {
3004 float sign = (rx >= 0.0F) ? 1.0F : -1.0F;
3005 uint face = (rx >= 0.0F) ? PIPE_TEX_FACE_POS_X : PIPE_TEX_FACE_NEG_X;
3006 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3007 const float ima = -0.5F / fabsf(s[j]);
3008 ssss[j] = sign * p[j] * ima + 0.5F;
3009 tttt[j] = t[j] * ima + 0.5F;
3010 sp_sview->faces[j] = face;
3011 }
3012 }
3013 else if (ary >= arx && ary >= arz) {
3014 float sign = (ry >= 0.0F) ? 1.0F : -1.0F;
3015 uint face = (ry >= 0.0F) ? PIPE_TEX_FACE_POS_Y : PIPE_TEX_FACE_NEG_Y;
3016 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3017 const float ima = -0.5F / fabsf(t[j]);
3018 ssss[j] = -s[j] * ima + 0.5F;
3019 tttt[j] = sign * -p[j] * ima + 0.5F;
3020 sp_sview->faces[j] = face;
3021 }
3022 }
3023 else {
3024 float sign = (rz >= 0.0F) ? 1.0F : -1.0F;
3025 uint face = (rz >= 0.0F) ? PIPE_TEX_FACE_POS_Z : PIPE_TEX_FACE_NEG_Z;
3026 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3027 const float ima = -0.5F / fabsf(p[j]);
3028 ssss[j] = sign * -s[j] * ima + 0.5F;
3029 tttt[j] = t[j] * ima + 0.5F;
3030 sp_sview->faces[j] = face;
3031 }
3032 }
3033 }
3034
3035 sample_mip(sp_sview, sp_samp, ssss, tttt, pppp, c0, c1, filt_args, rgba);
3036 }
3037
3038
3039 static void
3040 sp_get_dims(struct sp_sampler_view *sp_sview, int level,
3041 int dims[4])
3042 {
3043 const struct pipe_sampler_view *view = &sp_sview->base;
3044 const struct pipe_resource *texture = view->texture;
3045
3046 if (view->target == PIPE_BUFFER) {
3047 dims[0] = (view->u.buf.last_element - view->u.buf.first_element) + 1;
3048 /* the other values are undefined, but let's avoid potential valgrind
3049 * warnings.
3050 */
3051 dims[1] = dims[2] = dims[3] = 0;
3052 return;
3053 }
3054
3055 /* undefined according to EXT_gpu_program */
3056 level += view->u.tex.first_level;
3057 if (level > view->u.tex.last_level)
3058 return;
3059
3060 dims[3] = view->u.tex.last_level - view->u.tex.first_level + 1;
3061 dims[0] = u_minify(texture->width0, level);
3062
3063 switch (view->target) {
3064 case PIPE_TEXTURE_1D_ARRAY:
3065 dims[1] = view->u.tex.last_layer - view->u.tex.first_layer + 1;
3066 /* fallthrough */
3067 case PIPE_TEXTURE_1D:
3068 return;
3069 case PIPE_TEXTURE_2D_ARRAY:
3070 dims[2] = view->u.tex.last_layer - view->u.tex.first_layer + 1;
3071 /* fallthrough */
3072 case PIPE_TEXTURE_2D:
3073 case PIPE_TEXTURE_CUBE:
3074 case PIPE_TEXTURE_RECT:
3075 dims[1] = u_minify(texture->height0, level);
3076 return;
3077 case PIPE_TEXTURE_3D:
3078 dims[1] = u_minify(texture->height0, level);
3079 dims[2] = u_minify(texture->depth0, level);
3080 return;
3081 case PIPE_TEXTURE_CUBE_ARRAY:
3082 dims[1] = u_minify(texture->height0, level);
3083 dims[2] = (view->u.tex.last_layer - view->u.tex.first_layer + 1) / 6;
3084 break;
3085 default:
3086 assert(!"unexpected texture target in sp_get_dims()");
3087 return;
3088 }
3089 }
3090
3091 /**
3092 * This function is only used for getting unfiltered texels via the
3093 * TXF opcode. The GL spec says that out-of-bounds texel fetches
3094 * produce undefined results. Instead of crashing, lets just clamp
3095 * coords to the texture image size.
3096 */
3097 static void
3098 sp_get_texels(struct sp_sampler_view *sp_sview,
3099 const int v_i[TGSI_QUAD_SIZE],
3100 const int v_j[TGSI_QUAD_SIZE],
3101 const int v_k[TGSI_QUAD_SIZE],
3102 const int lod[TGSI_QUAD_SIZE],
3103 const int8_t offset[3],
3104 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
3105 {
3106 union tex_tile_address addr;
3107 const struct pipe_resource *texture = sp_sview->base.texture;
3108 int j, c;
3109 const float *tx;
3110 int width, height, depth;
3111
3112 addr.value = 0;
3113 /* TODO write a better test for LOD */
3114 addr.bits.level = sp_sview->base.target == PIPE_BUFFER ? 0 :
3115 CLAMP(lod[0] + sp_sview->base.u.tex.first_level,
3116 sp_sview->base.u.tex.first_level,
3117 sp_sview->base.u.tex.last_level);
3118
3119 width = u_minify(texture->width0, addr.bits.level);
3120 height = u_minify(texture->height0, addr.bits.level);
3121 depth = u_minify(texture->depth0, addr.bits.level);
3122
3123 switch (sp_sview->base.target) {
3124 case PIPE_BUFFER:
3125 case PIPE_TEXTURE_1D:
3126 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3127 int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
3128 tx = get_texel_2d_no_border(sp_sview, addr, x, 0);
3129 for (c = 0; c < 4; c++) {
3130 rgba[c][j] = tx[c];
3131 }
3132 }
3133 break;
3134 case PIPE_TEXTURE_1D_ARRAY:
3135 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3136 int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
3137 int y = CLAMP(v_j[j], sp_sview->base.u.tex.first_layer,
3138 sp_sview->base.u.tex.last_layer);
3139 tx = get_texel_2d_no_border(sp_sview, addr, x, y);
3140 for (c = 0; c < 4; c++) {
3141 rgba[c][j] = tx[c];
3142 }
3143 }
3144 break;
3145 case PIPE_TEXTURE_2D:
3146 case PIPE_TEXTURE_RECT:
3147 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3148 int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
3149 int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
3150 tx = get_texel_2d_no_border(sp_sview, addr, x, y);
3151 for (c = 0; c < 4; c++) {
3152 rgba[c][j] = tx[c];
3153 }
3154 }
3155 break;
3156 case PIPE_TEXTURE_2D_ARRAY:
3157 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3158 int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
3159 int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
3160 int layer = CLAMP(v_k[j], sp_sview->base.u.tex.first_layer,
3161 sp_sview->base.u.tex.last_layer);
3162 tx = get_texel_3d_no_border(sp_sview, addr, x, y, layer);
3163 for (c = 0; c < 4; c++) {
3164 rgba[c][j] = tx[c];
3165 }
3166 }
3167 break;
3168 case PIPE_TEXTURE_3D:
3169 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3170 int x = CLAMP(v_i[j] + offset[0], 0, width - 1);
3171 int y = CLAMP(v_j[j] + offset[1], 0, height - 1);
3172 int z = CLAMP(v_k[j] + offset[2], 0, depth - 1);
3173 tx = get_texel_3d_no_border(sp_sview, addr, x, y, z);
3174 for (c = 0; c < 4; c++) {
3175 rgba[c][j] = tx[c];
3176 }
3177 }
3178 break;
3179 case PIPE_TEXTURE_CUBE: /* TXF can't work on CUBE according to spec */
3180 default:
3181 assert(!"Unknown or CUBE texture type in TXF processing\n");
3182 break;
3183 }
3184
3185 if (sp_sview->need_swizzle) {
3186 float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
3187 memcpy(rgba_temp, rgba, sizeof(rgba_temp));
3188 do_swizzling(&sp_sview->base, rgba_temp, rgba);
3189 }
3190 }
3191
3192
3193 void *
3194 softpipe_create_sampler_state(struct pipe_context *pipe,
3195 const struct pipe_sampler_state *sampler)
3196 {
3197 struct sp_sampler *samp = CALLOC_STRUCT(sp_sampler);
3198
3199 samp->base = *sampler;
3200
3201 /* Note that (for instance) linear_texcoord_s and
3202 * nearest_texcoord_s may be active at the same time, if the
3203 * sampler min_img_filter differs from its mag_img_filter.
3204 */
3205 if (sampler->normalized_coords) {
3206 samp->linear_texcoord_s = get_linear_wrap( sampler->wrap_s );
3207 samp->linear_texcoord_t = get_linear_wrap( sampler->wrap_t );
3208 samp->linear_texcoord_p = get_linear_wrap( sampler->wrap_r );
3209
3210 samp->nearest_texcoord_s = get_nearest_wrap( sampler->wrap_s );
3211 samp->nearest_texcoord_t = get_nearest_wrap( sampler->wrap_t );
3212 samp->nearest_texcoord_p = get_nearest_wrap( sampler->wrap_r );
3213 }
3214 else {
3215 samp->linear_texcoord_s = get_linear_unorm_wrap( sampler->wrap_s );
3216 samp->linear_texcoord_t = get_linear_unorm_wrap( sampler->wrap_t );
3217 samp->linear_texcoord_p = get_linear_unorm_wrap( sampler->wrap_r );
3218
3219 samp->nearest_texcoord_s = get_nearest_unorm_wrap( sampler->wrap_s );
3220 samp->nearest_texcoord_t = get_nearest_unorm_wrap( sampler->wrap_t );
3221 samp->nearest_texcoord_p = get_nearest_unorm_wrap( sampler->wrap_r );
3222 }
3223
3224 samp->min_img_filter = sampler->min_img_filter;
3225
3226 switch (sampler->min_mip_filter) {
3227 case PIPE_TEX_MIPFILTER_NONE:
3228 if (sampler->min_img_filter == sampler->mag_img_filter)
3229 samp->mip_filter = mip_filter_none_no_filter_select;
3230 else
3231 samp->mip_filter = mip_filter_none;
3232 break;
3233
3234 case PIPE_TEX_MIPFILTER_NEAREST:
3235 samp->mip_filter = mip_filter_nearest;
3236 break;
3237
3238 case PIPE_TEX_MIPFILTER_LINEAR:
3239 if (sampler->min_img_filter == sampler->mag_img_filter &&
3240 sampler->normalized_coords &&
3241 sampler->wrap_s == PIPE_TEX_WRAP_REPEAT &&
3242 sampler->wrap_t == PIPE_TEX_WRAP_REPEAT &&
3243 sampler->min_img_filter == PIPE_TEX_FILTER_LINEAR &&
3244 sampler->max_anisotropy <= 1) {
3245 samp->min_mag_equal_repeat_linear = TRUE;
3246 }
3247 samp->mip_filter = mip_filter_linear;
3248
3249 /* Anisotropic filtering extension. */
3250 if (sampler->max_anisotropy > 1) {
3251 samp->mip_filter = mip_filter_linear_aniso;
3252
3253 /* Override min_img_filter:
3254 * min_img_filter needs to be set to NEAREST since we need to access
3255 * each texture pixel as it is and weight it later; using linear
3256 * filters will have incorrect results.
3257 * By setting the filter to NEAREST here, we can avoid calling the
3258 * generic img_filter_2d_nearest in the anisotropic filter function,
3259 * making it possible to use one of the accelerated implementations
3260 */
3261 samp->min_img_filter = PIPE_TEX_FILTER_NEAREST;
3262
3263 /* on first access create the lookup table containing the filter weights. */
3264 if (!weightLut) {
3265 create_filter_table();
3266 }
3267 }
3268 break;
3269 }
3270 if (samp->min_img_filter == sampler->mag_img_filter) {
3271 samp->min_mag_equal = TRUE;
3272 }
3273
3274 return (void *)samp;
3275 }
3276
3277
3278 compute_lambda_func
3279 softpipe_get_lambda_func(const struct pipe_sampler_view *view, unsigned shader)
3280 {
3281 if (shader != PIPE_SHADER_FRAGMENT)
3282 return compute_lambda_vert;
3283
3284 switch (view->target) {
3285 case PIPE_BUFFER:
3286 case PIPE_TEXTURE_1D:
3287 case PIPE_TEXTURE_1D_ARRAY:
3288 return compute_lambda_1d;
3289 case PIPE_TEXTURE_2D:
3290 case PIPE_TEXTURE_2D_ARRAY:
3291 case PIPE_TEXTURE_RECT:
3292 case PIPE_TEXTURE_CUBE:
3293 case PIPE_TEXTURE_CUBE_ARRAY:
3294 return compute_lambda_2d;
3295 case PIPE_TEXTURE_3D:
3296 return compute_lambda_3d;
3297 default:
3298 assert(0);
3299 return compute_lambda_1d;
3300 }
3301 }
3302
3303
3304 struct pipe_sampler_view *
3305 softpipe_create_sampler_view(struct pipe_context *pipe,
3306 struct pipe_resource *resource,
3307 const struct pipe_sampler_view *templ)
3308 {
3309 struct sp_sampler_view *sview = CALLOC_STRUCT(sp_sampler_view);
3310 struct softpipe_resource *spr = (struct softpipe_resource *)resource;
3311
3312 if (sview) {
3313 struct pipe_sampler_view *view = &sview->base;
3314 *view = *templ;
3315 view->reference.count = 1;
3316 view->texture = NULL;
3317 pipe_resource_reference(&view->texture, resource);
3318 view->context = pipe;
3319
3320 #ifdef DEBUG
3321 /*
3322 * This is possibly too lenient, but the primary reason is just
3323 * to catch state trackers which forget to initialize this, so
3324 * it only catches clearly impossible view targets.
3325 */
3326 if (view->target != resource->target) {
3327 if (view->target == PIPE_TEXTURE_1D)
3328 assert(resource->target == PIPE_TEXTURE_1D_ARRAY);
3329 else if (view->target == PIPE_TEXTURE_1D_ARRAY)
3330 assert(resource->target == PIPE_TEXTURE_1D);
3331 else if (view->target == PIPE_TEXTURE_2D)
3332 assert(resource->target == PIPE_TEXTURE_2D_ARRAY ||
3333 resource->target == PIPE_TEXTURE_CUBE ||
3334 resource->target == PIPE_TEXTURE_CUBE_ARRAY);
3335 else if (view->target == PIPE_TEXTURE_2D_ARRAY)
3336 assert(resource->target == PIPE_TEXTURE_2D ||
3337 resource->target == PIPE_TEXTURE_CUBE ||
3338 resource->target == PIPE_TEXTURE_CUBE_ARRAY);
3339 else if (view->target == PIPE_TEXTURE_CUBE)
3340 assert(resource->target == PIPE_TEXTURE_CUBE_ARRAY ||
3341 resource->target == PIPE_TEXTURE_2D_ARRAY);
3342 else if (view->target == PIPE_TEXTURE_CUBE_ARRAY)
3343 assert(resource->target == PIPE_TEXTURE_CUBE ||
3344 resource->target == PIPE_TEXTURE_2D_ARRAY);
3345 else
3346 assert(0);
3347 }
3348 #endif
3349
3350 if (any_swizzle(view)) {
3351 sview->need_swizzle = TRUE;
3352 }
3353
3354 if (view->target == PIPE_TEXTURE_CUBE ||
3355 view->target == PIPE_TEXTURE_CUBE_ARRAY)
3356 sview->get_samples = sample_cube;
3357 else {
3358 sview->get_samples = sample_mip;
3359 }
3360 sview->pot2d = spr->pot &&
3361 (view->target == PIPE_TEXTURE_2D ||
3362 view->target == PIPE_TEXTURE_RECT);
3363
3364 sview->xpot = util_logbase2( resource->width0 );
3365 sview->ypot = util_logbase2( resource->height0 );
3366 }
3367
3368 return (struct pipe_sampler_view *) sview;
3369 }
3370
3371
3372 static void
3373 sp_tgsi_get_dims(struct tgsi_sampler *tgsi_sampler,
3374 const unsigned sview_index,
3375 int level, int dims[4])
3376 {
3377 struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler;
3378
3379 assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
3380 /* always have a view here but texture is NULL if no sampler view was set. */
3381 if (!sp_samp->sp_sview[sview_index].base.texture) {
3382 dims[0] = dims[1] = dims[2] = dims[3] = 0;
3383 return;
3384 }
3385 sp_get_dims(&sp_samp->sp_sview[sview_index], level, dims);
3386 }
3387
3388
3389 static void
3390 sp_tgsi_get_samples(struct tgsi_sampler *tgsi_sampler,
3391 const unsigned sview_index,
3392 const unsigned sampler_index,
3393 const float s[TGSI_QUAD_SIZE],
3394 const float t[TGSI_QUAD_SIZE],
3395 const float p[TGSI_QUAD_SIZE],
3396 const float c0[TGSI_QUAD_SIZE],
3397 const float lod[TGSI_QUAD_SIZE],
3398 float derivs[3][2][TGSI_QUAD_SIZE],
3399 const int8_t offset[3],
3400 enum tgsi_sampler_control control,
3401 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
3402 {
3403 struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler;
3404 struct filter_args filt_args;
3405 assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
3406 assert(sampler_index < PIPE_MAX_SAMPLERS);
3407 assert(sp_samp->sp_sampler[sampler_index]);
3408 /* always have a view here but texture is NULL if no sampler view was set. */
3409 if (!sp_samp->sp_sview[sview_index].base.texture) {
3410 int i, j;
3411 for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
3412 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
3413 rgba[j][i] = 0.0f;
3414 }
3415 }
3416 return;
3417 }
3418
3419 filt_args.control = control;
3420 filt_args.offset = offset;
3421 sp_samp->sp_sview[sview_index].get_samples(&sp_samp->sp_sview[sview_index],
3422 sp_samp->sp_sampler[sampler_index],
3423 s, t, p, c0, lod, &filt_args, rgba);
3424 }
3425
3426
3427 static void
3428 sp_tgsi_get_texel(struct tgsi_sampler *tgsi_sampler,
3429 const unsigned sview_index,
3430 const int i[TGSI_QUAD_SIZE],
3431 const int j[TGSI_QUAD_SIZE], const int k[TGSI_QUAD_SIZE],
3432 const int lod[TGSI_QUAD_SIZE], const int8_t offset[3],
3433 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
3434 {
3435 struct sp_tgsi_sampler *sp_samp = (struct sp_tgsi_sampler *)tgsi_sampler;
3436
3437 assert(sview_index < PIPE_MAX_SHADER_SAMPLER_VIEWS);
3438 /* always have a view here but texture is NULL if no sampler view was set. */
3439 if (!sp_samp->sp_sview[sview_index].base.texture) {
3440 int i, j;
3441 for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
3442 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
3443 rgba[j][i] = 0.0f;
3444 }
3445 }
3446 return;
3447 }
3448 sp_get_texels(&sp_samp->sp_sview[sview_index], i, j, k, lod, offset, rgba);
3449 }
3450
3451
3452 struct sp_tgsi_sampler *
3453 sp_create_tgsi_sampler(void)
3454 {
3455 struct sp_tgsi_sampler *samp = CALLOC_STRUCT(sp_tgsi_sampler);
3456 if (!samp)
3457 return NULL;
3458
3459 samp->base.get_dims = sp_tgsi_get_dims;
3460 samp->base.get_samples = sp_tgsi_get_samples;
3461 samp->base.get_texel = sp_tgsi_get_texel;
3462
3463 return samp;
3464 }
3465