mesa: Don't bind DRAW/READ_FRAMEBUFFER separately without FBO blit support
[mesa.git] / src / gallium / drivers / llvmpipe / lp_tex_sample_c.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 * Copyright 2008 VMware, Inc. All rights reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * Texture sampling
31 *
32 * Authors:
33 * Brian Paul
34 */
35
36 #include "lp_context.h"
37 #include "lp_quad.h"
38 #include "lp_texture.h"
39 #include "lp_tex_sample.h"
40 #include "lp_tex_cache.h"
41 #include "pipe/p_context.h"
42 #include "pipe/p_defines.h"
43 #include "pipe/p_shader_tokens.h"
44 #include "util/u_math.h"
45 #include "util/u_memory.h"
46
47
48
49 /*
50 * Note, the FRAC macro has to work perfectly. Otherwise you'll sometimes
51 * see 1-pixel bands of improperly weighted linear-filtered textures.
52 * The tests/texwrap.c demo is a good test.
53 * Also note, FRAC(x) doesn't truly return the fractional part of x for x < 0.
54 * Instead, if x < 0 then FRAC(x) = 1 - true_frac(x).
55 */
56 #define FRAC(f) ((f) - util_ifloor(f))
57
58
59 /**
60 * Linear interpolation macro
61 */
62 static INLINE float
63 lerp(float a, float v0, float v1)
64 {
65 return v0 + a * (v1 - v0);
66 }
67
68
69 /**
70 * Do 2D/biliner interpolation of float values.
71 * v00, v10, v01 and v11 are typically four texture samples in a square/box.
72 * a and b are the horizontal and vertical interpolants.
73 * It's important that this function is inlined when compiled with
74 * optimization! If we find that's not true on some systems, convert
75 * to a macro.
76 */
77 static INLINE float
78 lerp_2d(float a, float b,
79 float v00, float v10, float v01, float v11)
80 {
81 const float temp0 = lerp(a, v00, v10);
82 const float temp1 = lerp(a, v01, v11);
83 return lerp(b, temp0, temp1);
84 }
85
86
87 /**
88 * As above, but 3D interpolation of 8 values.
89 */
90 static INLINE float
91 lerp_3d(float a, float b, float c,
92 float v000, float v100, float v010, float v110,
93 float v001, float v101, float v011, float v111)
94 {
95 const float temp0 = lerp_2d(a, b, v000, v100, v010, v110);
96 const float temp1 = lerp_2d(a, b, v001, v101, v011, v111);
97 return lerp(c, temp0, temp1);
98 }
99
100
101
102 /**
103 * If A is a signed integer, A % B doesn't give the right value for A < 0
104 * (in terms of texture repeat). Just casting to unsigned fixes that.
105 */
106 #define REMAINDER(A, B) ((unsigned) (A) % (unsigned) (B))
107
108
109 /**
110 * Apply texture coord wrapping mode and return integer texture indexes
111 * for a vector of four texcoords (S or T or P).
112 * \param wrapMode PIPE_TEX_WRAP_x
113 * \param s the incoming texcoords
114 * \param size the texture image size
115 * \param icoord returns the integer texcoords
116 * \return integer texture index
117 */
118 static INLINE void
119 nearest_texcoord_4(unsigned wrapMode, const float s[4], unsigned size,
120 int icoord[4])
121 {
122 uint ch;
123 switch (wrapMode) {
124 case PIPE_TEX_WRAP_REPEAT:
125 /* s limited to [0,1) */
126 /* i limited to [0,size-1] */
127 for (ch = 0; ch < 4; ch++) {
128 int i = util_ifloor(s[ch] * size);
129 icoord[ch] = REMAINDER(i, size);
130 }
131 return;
132 case PIPE_TEX_WRAP_CLAMP:
133 /* s limited to [0,1] */
134 /* i limited to [0,size-1] */
135 for (ch = 0; ch < 4; ch++) {
136 if (s[ch] <= 0.0F)
137 icoord[ch] = 0;
138 else if (s[ch] >= 1.0F)
139 icoord[ch] = size - 1;
140 else
141 icoord[ch] = util_ifloor(s[ch] * size);
142 }
143 return;
144 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
145 {
146 /* s limited to [min,max] */
147 /* i limited to [0, size-1] */
148 const float min = 1.0F / (2.0F * size);
149 const float max = 1.0F - min;
150 for (ch = 0; ch < 4; ch++) {
151 if (s[ch] < min)
152 icoord[ch] = 0;
153 else if (s[ch] > max)
154 icoord[ch] = size - 1;
155 else
156 icoord[ch] = util_ifloor(s[ch] * size);
157 }
158 }
159 return;
160 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
161 {
162 /* s limited to [min,max] */
163 /* i limited to [-1, size] */
164 const float min = -1.0F / (2.0F * size);
165 const float max = 1.0F - min;
166 for (ch = 0; ch < 4; ch++) {
167 if (s[ch] <= min)
168 icoord[ch] = -1;
169 else if (s[ch] >= max)
170 icoord[ch] = size;
171 else
172 icoord[ch] = util_ifloor(s[ch] * size);
173 }
174 }
175 return;
176 case PIPE_TEX_WRAP_MIRROR_REPEAT:
177 {
178 const float min = 1.0F / (2.0F * size);
179 const float max = 1.0F - min;
180 for (ch = 0; ch < 4; ch++) {
181 const int flr = util_ifloor(s[ch]);
182 float u;
183 if (flr & 1)
184 u = 1.0F - (s[ch] - (float) flr);
185 else
186 u = s[ch] - (float) flr;
187 if (u < min)
188 icoord[ch] = 0;
189 else if (u > max)
190 icoord[ch] = size - 1;
191 else
192 icoord[ch] = util_ifloor(u * size);
193 }
194 }
195 return;
196 case PIPE_TEX_WRAP_MIRROR_CLAMP:
197 for (ch = 0; ch < 4; ch++) {
198 /* s limited to [0,1] */
199 /* i limited to [0,size-1] */
200 const float u = fabsf(s[ch]);
201 if (u <= 0.0F)
202 icoord[ch] = 0;
203 else if (u >= 1.0F)
204 icoord[ch] = size - 1;
205 else
206 icoord[ch] = util_ifloor(u * size);
207 }
208 return;
209 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
210 {
211 /* s limited to [min,max] */
212 /* i limited to [0, size-1] */
213 const float min = 1.0F / (2.0F * size);
214 const float max = 1.0F - min;
215 for (ch = 0; ch < 4; ch++) {
216 const float u = fabsf(s[ch]);
217 if (u < min)
218 icoord[ch] = 0;
219 else if (u > max)
220 icoord[ch] = size - 1;
221 else
222 icoord[ch] = util_ifloor(u * size);
223 }
224 }
225 return;
226 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
227 {
228 /* s limited to [min,max] */
229 /* i limited to [0, size-1] */
230 const float min = -1.0F / (2.0F * size);
231 const float max = 1.0F - min;
232 for (ch = 0; ch < 4; ch++) {
233 const float u = fabsf(s[ch]);
234 if (u < min)
235 icoord[ch] = -1;
236 else if (u > max)
237 icoord[ch] = size;
238 else
239 icoord[ch] = util_ifloor(u * size);
240 }
241 }
242 return;
243 default:
244 assert(0);
245 }
246 }
247
248
249 /**
250 * Used to compute texel locations for linear sampling for four texcoords.
251 * \param wrapMode PIPE_TEX_WRAP_x
252 * \param s the texcoords
253 * \param size the texture image size
254 * \param icoord0 returns first texture indexes
255 * \param icoord1 returns second texture indexes (usually icoord0 + 1)
256 * \param w returns blend factor/weight between texture indexes
257 * \param icoord returns the computed integer texture coords
258 */
259 static INLINE void
260 linear_texcoord_4(unsigned wrapMode, const float s[4], unsigned size,
261 int icoord0[4], int icoord1[4], float w[4])
262 {
263 uint ch;
264
265 switch (wrapMode) {
266 case PIPE_TEX_WRAP_REPEAT:
267 for (ch = 0; ch < 4; ch++) {
268 float u = s[ch] * size - 0.5F;
269 icoord0[ch] = REMAINDER(util_ifloor(u), size);
270 icoord1[ch] = REMAINDER(icoord0[ch] + 1, size);
271 w[ch] = FRAC(u);
272 }
273 break;;
274 case PIPE_TEX_WRAP_CLAMP:
275 for (ch = 0; ch < 4; ch++) {
276 float u = CLAMP(s[ch], 0.0F, 1.0F);
277 u = u * size - 0.5f;
278 icoord0[ch] = util_ifloor(u);
279 icoord1[ch] = icoord0[ch] + 1;
280 w[ch] = FRAC(u);
281 }
282 break;;
283 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
284 for (ch = 0; ch < 4; ch++) {
285 float u = CLAMP(s[ch], 0.0F, 1.0F);
286 u = u * size - 0.5f;
287 icoord0[ch] = util_ifloor(u);
288 icoord1[ch] = icoord0[ch] + 1;
289 if (icoord0[ch] < 0)
290 icoord0[ch] = 0;
291 if (icoord1[ch] >= (int) size)
292 icoord1[ch] = size - 1;
293 w[ch] = FRAC(u);
294 }
295 break;;
296 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
297 {
298 const float min = -1.0F / (2.0F * size);
299 const float max = 1.0F - min;
300 for (ch = 0; ch < 4; ch++) {
301 float u = CLAMP(s[ch], min, max);
302 u = u * size - 0.5f;
303 icoord0[ch] = util_ifloor(u);
304 icoord1[ch] = icoord0[ch] + 1;
305 w[ch] = FRAC(u);
306 }
307 }
308 break;;
309 case PIPE_TEX_WRAP_MIRROR_REPEAT:
310 for (ch = 0; ch < 4; ch++) {
311 const int flr = util_ifloor(s[ch]);
312 float u;
313 if (flr & 1)
314 u = 1.0F - (s[ch] - (float) flr);
315 else
316 u = s[ch] - (float) flr;
317 u = u * size - 0.5F;
318 icoord0[ch] = util_ifloor(u);
319 icoord1[ch] = icoord0[ch] + 1;
320 if (icoord0[ch] < 0)
321 icoord0[ch] = 0;
322 if (icoord1[ch] >= (int) size)
323 icoord1[ch] = size - 1;
324 w[ch] = FRAC(u);
325 }
326 break;;
327 case PIPE_TEX_WRAP_MIRROR_CLAMP:
328 for (ch = 0; ch < 4; ch++) {
329 float u = fabsf(s[ch]);
330 if (u >= 1.0F)
331 u = (float) size;
332 else
333 u *= size;
334 u -= 0.5F;
335 icoord0[ch] = util_ifloor(u);
336 icoord1[ch] = icoord0[ch] + 1;
337 w[ch] = FRAC(u);
338 }
339 break;;
340 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
341 for (ch = 0; ch < 4; ch++) {
342 float u = fabsf(s[ch]);
343 if (u >= 1.0F)
344 u = (float) size;
345 else
346 u *= size;
347 u -= 0.5F;
348 icoord0[ch] = util_ifloor(u);
349 icoord1[ch] = icoord0[ch] + 1;
350 if (icoord0[ch] < 0)
351 icoord0[ch] = 0;
352 if (icoord1[ch] >= (int) size)
353 icoord1[ch] = size - 1;
354 w[ch] = FRAC(u);
355 }
356 break;;
357 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
358 {
359 const float min = -1.0F / (2.0F * size);
360 const float max = 1.0F - min;
361 for (ch = 0; ch < 4; ch++) {
362 float u = fabsf(s[ch]);
363 if (u <= min)
364 u = min * size;
365 else if (u >= max)
366 u = max * size;
367 else
368 u *= size;
369 u -= 0.5F;
370 icoord0[ch] = util_ifloor(u);
371 icoord1[ch] = icoord0[ch] + 1;
372 w[ch] = FRAC(u);
373 }
374 }
375 break;;
376 default:
377 assert(0);
378 }
379 }
380
381
382 /**
383 * For RECT textures / unnormalized texcoords
384 * Only a subset of wrap modes supported.
385 */
386 static INLINE void
387 nearest_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size,
388 int icoord[4])
389 {
390 uint ch;
391 switch (wrapMode) {
392 case PIPE_TEX_WRAP_CLAMP:
393 for (ch = 0; ch < 4; ch++) {
394 int i = util_ifloor(s[ch]);
395 icoord[ch]= CLAMP(i, 0, (int) size-1);
396 }
397 return;
398 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
399 /* fall-through */
400 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
401 for (ch = 0; ch < 4; ch++) {
402 icoord[ch]= util_ifloor( CLAMP(s[ch], 0.5F, (float) size - 0.5F) );
403 }
404 return;
405 default:
406 assert(0);
407 }
408 }
409
410
411 /**
412 * For RECT textures / unnormalized texcoords.
413 * Only a subset of wrap modes supported.
414 */
415 static INLINE void
416 linear_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size,
417 int icoord0[4], int icoord1[4], float w[4])
418 {
419 uint ch;
420 switch (wrapMode) {
421 case PIPE_TEX_WRAP_CLAMP:
422 for (ch = 0; ch < 4; ch++) {
423 /* Not exactly what the spec says, but it matches NVIDIA output */
424 float u = CLAMP(s[ch] - 0.5F, 0.0f, (float) size - 1.0f);
425 icoord0[ch] = util_ifloor(u);
426 icoord1[ch] = icoord0[ch] + 1;
427 w[ch] = FRAC(u);
428 }
429 return;
430 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
431 /* fall-through */
432 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
433 for (ch = 0; ch < 4; ch++) {
434 float u = CLAMP(s[ch], 0.5F, (float) size - 0.5F);
435 u -= 0.5F;
436 icoord0[ch] = util_ifloor(u);
437 icoord1[ch] = icoord0[ch] + 1;
438 if (icoord1[ch] > (int) size - 1)
439 icoord1[ch] = size - 1;
440 w[ch] = FRAC(u);
441 }
442 break;
443 default:
444 assert(0);
445 }
446 }
447
448
449 static unsigned
450 choose_cube_face(float rx, float ry, float rz, float *newS, float *newT)
451 {
452 /*
453 major axis
454 direction target sc tc ma
455 ---------- ------------------------------- --- --- ---
456 +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx
457 -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx
458 +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry
459 -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry
460 +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz
461 -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz
462 */
463 const float arx = fabsf(rx), ary = fabsf(ry), arz = fabsf(rz);
464 unsigned face;
465 float sc, tc, ma;
466
467 if (arx > ary && arx > arz) {
468 if (rx >= 0.0F) {
469 face = PIPE_TEX_FACE_POS_X;
470 sc = -rz;
471 tc = -ry;
472 ma = arx;
473 }
474 else {
475 face = PIPE_TEX_FACE_NEG_X;
476 sc = rz;
477 tc = -ry;
478 ma = arx;
479 }
480 }
481 else if (ary > arx && ary > arz) {
482 if (ry >= 0.0F) {
483 face = PIPE_TEX_FACE_POS_Y;
484 sc = rx;
485 tc = rz;
486 ma = ary;
487 }
488 else {
489 face = PIPE_TEX_FACE_NEG_Y;
490 sc = rx;
491 tc = -rz;
492 ma = ary;
493 }
494 }
495 else {
496 if (rz > 0.0F) {
497 face = PIPE_TEX_FACE_POS_Z;
498 sc = rx;
499 tc = -ry;
500 ma = arz;
501 }
502 else {
503 face = PIPE_TEX_FACE_NEG_Z;
504 sc = -rx;
505 tc = -ry;
506 ma = arz;
507 }
508 }
509
510 *newS = ( sc / ma + 1.0F ) * 0.5F;
511 *newT = ( tc / ma + 1.0F ) * 0.5F;
512
513 return face;
514 }
515
516
517 /**
518 * Examine the quad's texture coordinates to compute the partial
519 * derivatives w.r.t X and Y, then compute lambda (level of detail).
520 *
521 * This is only done for fragment shaders, not vertex shaders.
522 */
523 static float
524 compute_lambda(struct tgsi_sampler *tgsi_sampler,
525 const float s[QUAD_SIZE],
526 const float t[QUAD_SIZE],
527 const float p[QUAD_SIZE],
528 float lodbias)
529 {
530 const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
531 const struct pipe_texture *texture = samp->texture;
532 const struct pipe_sampler_state *sampler = samp->sampler;
533 float rho, lambda;
534
535 if (samp->processor == TGSI_PROCESSOR_VERTEX)
536 return lodbias;
537
538 assert(sampler->normalized_coords);
539
540 assert(s);
541 {
542 float dsdx = s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT];
543 float dsdy = s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT];
544 dsdx = fabsf(dsdx);
545 dsdy = fabsf(dsdy);
546 rho = MAX2(dsdx, dsdy) * texture->width[0];
547 }
548 if (t) {
549 float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT];
550 float dtdy = t[QUAD_TOP_LEFT] - t[QUAD_BOTTOM_LEFT];
551 float max;
552 dtdx = fabsf(dtdx);
553 dtdy = fabsf(dtdy);
554 max = MAX2(dtdx, dtdy) * texture->height[0];
555 rho = MAX2(rho, max);
556 }
557 if (p) {
558 float dpdx = p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT];
559 float dpdy = p[QUAD_TOP_LEFT] - p[QUAD_BOTTOM_LEFT];
560 float max;
561 dpdx = fabsf(dpdx);
562 dpdy = fabsf(dpdy);
563 max = MAX2(dpdx, dpdy) * texture->depth[0];
564 rho = MAX2(rho, max);
565 }
566
567 lambda = util_fast_log2(rho);
568 lambda += lodbias + sampler->lod_bias;
569 lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod);
570
571 return lambda;
572 }
573
574
575 /**
576 * Do several things here:
577 * 1. Compute lambda from the texcoords, if needed
578 * 2. Determine if we're minifying or magnifying
579 * 3. If minifying, choose mipmap levels
580 * 4. Return image filter to use within mipmap images
581 * \param level0 Returns first mipmap level to sample from
582 * \param level1 Returns second mipmap level to sample from
583 * \param levelBlend Returns blend factor between levels, in [0,1]
584 * \param imgFilter Returns either the min or mag filter, depending on lambda
585 */
586 static void
587 choose_mipmap_levels(struct tgsi_sampler *tgsi_sampler,
588 const float s[QUAD_SIZE],
589 const float t[QUAD_SIZE],
590 const float p[QUAD_SIZE],
591 float lodbias,
592 unsigned *level0, unsigned *level1, float *levelBlend,
593 unsigned *imgFilter)
594 {
595 const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
596 const struct pipe_texture *texture = samp->texture;
597 const struct pipe_sampler_state *sampler = samp->sampler;
598
599 if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
600 /* no mipmap selection needed */
601 *level0 = *level1 = CLAMP((int) sampler->min_lod,
602 0, (int) texture->last_level);
603
604 if (sampler->min_img_filter != sampler->mag_img_filter) {
605 /* non-mipmapped texture, but still need to determine if doing
606 * minification or magnification.
607 */
608 float lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias);
609 if (lambda <= 0.0) {
610 *imgFilter = sampler->mag_img_filter;
611 }
612 else {
613 *imgFilter = sampler->min_img_filter;
614 }
615 }
616 else {
617 *imgFilter = sampler->mag_img_filter;
618 }
619 }
620 else {
621 float lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias);
622
623 if (lambda <= 0.0) { /* XXX threshold depends on the filter */
624 /* magnifying */
625 *imgFilter = sampler->mag_img_filter;
626 *level0 = *level1 = 0;
627 }
628 else {
629 /* minifying */
630 *imgFilter = sampler->min_img_filter;
631
632 /* choose mipmap level(s) and compute the blend factor between them */
633 if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
634 /* Nearest mipmap level */
635 const int lvl = (int) (lambda + 0.5);
636 *level0 =
637 *level1 = CLAMP(lvl, 0, (int) texture->last_level);
638 }
639 else {
640 /* Linear interpolation between mipmap levels */
641 const int lvl = (int) lambda;
642 *level0 = CLAMP(lvl, 0, (int) texture->last_level);
643 *level1 = CLAMP(lvl + 1, 0, (int) texture->last_level);
644 *levelBlend = FRAC(lambda); /* blending weight between levels */
645 }
646 }
647 }
648 }
649
650
651 /**
652 * Get a texel from a texture, using the texture tile cache.
653 *
654 * \param face the cube face in 0..5
655 * \param level the mipmap level
656 * \param x the x coord of texel within 2D image
657 * \param y the y coord of texel within 2D image
658 * \param z which slice of a 3D texture
659 * \param rgba the quad to put the texel/color into
660 * \param j which element of the rgba quad to write to
661 *
662 * XXX maybe move this into lp_tile_cache.c and merge with the
663 * lp_get_cached_tile_tex() function. Also, get 4 texels instead of 1...
664 */
665 static void
666 get_texel_quad_2d(const struct tgsi_sampler *tgsi_sampler,
667 unsigned face, unsigned level, int x, int y,
668 const uint8_t *out[4])
669 {
670 const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
671
672 const struct llvmpipe_cached_tex_tile *tile
673 = lp_get_cached_tex_tile(samp->cache,
674 tex_tile_address(x, y, 0, face, level));
675
676 y %= TEX_TILE_SIZE;
677 x %= TEX_TILE_SIZE;
678
679 out[0] = &tile->color[y ][x ][0];
680 out[1] = &tile->color[y ][x+1][0];
681 out[2] = &tile->color[y+1][x ][0];
682 out[3] = &tile->color[y+1][x+1][0];
683 }
684
685 static INLINE const uint8_t *
686 get_texel_2d_ptr(const struct tgsi_sampler *tgsi_sampler,
687 unsigned face, unsigned level, int x, int y)
688 {
689 const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
690
691 const struct llvmpipe_cached_tex_tile *tile
692 = lp_get_cached_tex_tile(samp->cache,
693 tex_tile_address(x, y, 0, face, level));
694
695 y %= TEX_TILE_SIZE;
696 x %= TEX_TILE_SIZE;
697
698 return &tile->color[y][x][0];
699 }
700
701
702 static void
703 get_texel_quad_2d_mt(const struct tgsi_sampler *tgsi_sampler,
704 unsigned face, unsigned level,
705 int x0, int y0,
706 int x1, int y1,
707 const uint8_t *out[4])
708 {
709 unsigned i;
710
711 for (i = 0; i < 4; i++) {
712 unsigned tx = (i & 1) ? x1 : x0;
713 unsigned ty = (i >> 1) ? y1 : y0;
714
715 out[i] = get_texel_2d_ptr( tgsi_sampler, face, level, tx, ty );
716 }
717 }
718
719 static void
720 get_texel(const struct tgsi_sampler *tgsi_sampler,
721 unsigned face, unsigned level, int x, int y, int z,
722 float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j)
723 {
724 const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
725 const struct pipe_texture *texture = samp->texture;
726 const struct pipe_sampler_state *sampler = samp->sampler;
727
728 if (x < 0 || x >= (int) texture->width[level] ||
729 y < 0 || y >= (int) texture->height[level] ||
730 z < 0 || z >= (int) texture->depth[level]) {
731 rgba[0][j] = sampler->border_color[0];
732 rgba[1][j] = sampler->border_color[1];
733 rgba[2][j] = sampler->border_color[2];
734 rgba[3][j] = sampler->border_color[3];
735 }
736 else {
737 const unsigned tx = x % TEX_TILE_SIZE;
738 const unsigned ty = y % TEX_TILE_SIZE;
739 const struct llvmpipe_cached_tex_tile *tile;
740
741 tile = lp_get_cached_tex_tile(samp->cache,
742 tex_tile_address(x, y, z, face, level));
743
744 rgba[0][j] = ubyte_to_float(tile->color[ty][tx][0]);
745 rgba[1][j] = ubyte_to_float(tile->color[ty][tx][1]);
746 rgba[2][j] = ubyte_to_float(tile->color[ty][tx][2]);
747 rgba[3][j] = ubyte_to_float(tile->color[ty][tx][3]);
748 if (0)
749 {
750 debug_printf("Get texel %f %f %f %f from %s\n",
751 rgba[0][j], rgba[1][j], rgba[2][j], rgba[3][j],
752 pf_name(texture->format));
753 }
754 }
755 }
756
757
758 /**
759 * Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
760 * When we sampled the depth texture, the depth value was put into all
761 * RGBA channels. We look at the red channel here.
762 * \param rgba quad of (depth) texel values
763 * \param p texture 'P' components for four pixels in quad
764 * \param j which pixel in the quad to test [0..3]
765 */
766 static INLINE void
767 shadow_compare(const struct pipe_sampler_state *sampler,
768 float rgba[NUM_CHANNELS][QUAD_SIZE],
769 const float p[QUAD_SIZE],
770 uint j)
771 {
772 int k;
773 switch (sampler->compare_func) {
774 case PIPE_FUNC_LESS:
775 k = p[j] < rgba[0][j];
776 break;
777 case PIPE_FUNC_LEQUAL:
778 k = p[j] <= rgba[0][j];
779 break;
780 case PIPE_FUNC_GREATER:
781 k = p[j] > rgba[0][j];
782 break;
783 case PIPE_FUNC_GEQUAL:
784 k = p[j] >= rgba[0][j];
785 break;
786 case PIPE_FUNC_EQUAL:
787 k = p[j] == rgba[0][j];
788 break;
789 case PIPE_FUNC_NOTEQUAL:
790 k = p[j] != rgba[0][j];
791 break;
792 case PIPE_FUNC_ALWAYS:
793 k = 1;
794 break;
795 case PIPE_FUNC_NEVER:
796 k = 0;
797 break;
798 default:
799 k = 0;
800 assert(0);
801 break;
802 }
803
804 /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
805 rgba[0][j] = rgba[1][j] = rgba[2][j] = (float) k;
806 rgba[3][j] = 1.0F;
807 }
808
809
810 /**
811 * As above, but do four z/texture comparisons.
812 */
813 static INLINE void
814 shadow_compare4(const struct pipe_sampler_state *sampler,
815 float rgba[NUM_CHANNELS][QUAD_SIZE],
816 const float p[QUAD_SIZE])
817 {
818 int j, k0, k1, k2, k3;
819 float val;
820
821 /* compare four texcoords vs. four texture samples */
822 switch (sampler->compare_func) {
823 case PIPE_FUNC_LESS:
824 k0 = p[0] < rgba[0][0];
825 k1 = p[1] < rgba[0][1];
826 k2 = p[2] < rgba[0][2];
827 k3 = p[3] < rgba[0][3];
828 break;
829 case PIPE_FUNC_LEQUAL:
830 k0 = p[0] <= rgba[0][0];
831 k1 = p[1] <= rgba[0][1];
832 k2 = p[2] <= rgba[0][2];
833 k3 = p[3] <= rgba[0][3];
834 break;
835 case PIPE_FUNC_GREATER:
836 k0 = p[0] > rgba[0][0];
837 k1 = p[1] > rgba[0][1];
838 k2 = p[2] > rgba[0][2];
839 k3 = p[3] > rgba[0][3];
840 break;
841 case PIPE_FUNC_GEQUAL:
842 k0 = p[0] >= rgba[0][0];
843 k1 = p[1] >= rgba[0][1];
844 k2 = p[2] >= rgba[0][2];
845 k3 = p[3] >= rgba[0][3];
846 break;
847 case PIPE_FUNC_EQUAL:
848 k0 = p[0] == rgba[0][0];
849 k1 = p[1] == rgba[0][1];
850 k2 = p[2] == rgba[0][2];
851 k3 = p[3] == rgba[0][3];
852 break;
853 case PIPE_FUNC_NOTEQUAL:
854 k0 = p[0] != rgba[0][0];
855 k1 = p[1] != rgba[0][1];
856 k2 = p[2] != rgba[0][2];
857 k3 = p[3] != rgba[0][3];
858 break;
859 case PIPE_FUNC_ALWAYS:
860 k0 = k1 = k2 = k3 = 1;
861 break;
862 case PIPE_FUNC_NEVER:
863 k0 = k1 = k2 = k3 = 0;
864 break;
865 default:
866 k0 = k1 = k2 = k3 = 0;
867 assert(0);
868 break;
869 }
870
871 /* convert four pass/fail values to an intensity in [0,1] */
872 val = 0.25F * (k0 + k1 + k2 + k3);
873
874 /* XXX returning result for default GL_DEPTH_TEXTURE_MODE = GL_LUMINANCE */
875 for (j = 0; j < 4; j++) {
876 rgba[0][j] = rgba[1][j] = rgba[2][j] = val;
877 rgba[3][j] = 1.0F;
878 }
879 }
880
881
882
883 static void
884 lp_get_samples_2d_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler,
885 const float s[QUAD_SIZE],
886 const float t[QUAD_SIZE],
887 const float p[QUAD_SIZE],
888 float lodbias,
889 float rgba[NUM_CHANNELS][QUAD_SIZE])
890 {
891 const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
892 unsigned j;
893 unsigned level = samp->level;
894 unsigned xpot = 1 << (samp->xpot - level);
895 unsigned ypot = 1 << (samp->ypot - level);
896 unsigned xmax = (xpot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, xpot) - 1; */
897 unsigned ymax = (ypot - 1) & (TEX_TILE_SIZE - 1); /* MIN2(TEX_TILE_SIZE, ypot) - 1; */
898
899 for (j = 0; j < QUAD_SIZE; j++) {
900 int c;
901
902 float u = s[j] * xpot - 0.5F;
903 float v = t[j] * ypot - 0.5F;
904
905 int uflr = util_ifloor(u);
906 int vflr = util_ifloor(v);
907
908 float xw = u - (float)uflr;
909 float yw = v - (float)vflr;
910
911 int x0 = uflr & (xpot - 1);
912 int y0 = vflr & (ypot - 1);
913
914 const uint8_t *tx[4];
915
916
917 /* Can we fetch all four at once:
918 */
919 if (x0 < xmax && y0 < ymax)
920 {
921 get_texel_quad_2d(tgsi_sampler, 0, level, x0, y0, tx);
922 }
923 else
924 {
925 unsigned x1 = (x0 + 1) & (xpot - 1);
926 unsigned y1 = (y0 + 1) & (ypot - 1);
927 get_texel_quad_2d_mt(tgsi_sampler, 0, level,
928 x0, y0, x1, y1, tx);
929 }
930
931
932 /* interpolate R, G, B, A */
933 for (c = 0; c < 4; c++) {
934 rgba[c][j] = lerp_2d(xw, yw,
935 ubyte_to_float(tx[0][c]), ubyte_to_float(tx[1][c]),
936 ubyte_to_float(tx[2][c]), ubyte_to_float(tx[3][c]));
937 }
938 }
939 }
940
941
942 static void
943 lp_get_samples_2d_nearest_repeat_POT(struct tgsi_sampler *tgsi_sampler,
944 const float s[QUAD_SIZE],
945 const float t[QUAD_SIZE],
946 const float p[QUAD_SIZE],
947 float lodbias,
948 float rgba[NUM_CHANNELS][QUAD_SIZE])
949 {
950 const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
951 unsigned j;
952 unsigned level = samp->level;
953 unsigned xpot = 1 << (samp->xpot - level);
954 unsigned ypot = 1 << (samp->ypot - level);
955
956 for (j = 0; j < QUAD_SIZE; j++) {
957 int c;
958
959 float u = s[j] * xpot;
960 float v = t[j] * ypot;
961
962 int uflr = util_ifloor(u);
963 int vflr = util_ifloor(v);
964
965 int x0 = uflr & (xpot - 1);
966 int y0 = vflr & (ypot - 1);
967
968 const uint8_t *out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0);
969
970 for (c = 0; c < 4; c++) {
971 rgba[c][j] = ubyte_to_float(out[c]);
972 }
973 }
974 }
975
976
977 static void
978 lp_get_samples_2d_nearest_clamp_POT(struct tgsi_sampler *tgsi_sampler,
979 const float s[QUAD_SIZE],
980 const float t[QUAD_SIZE],
981 const float p[QUAD_SIZE],
982 float lodbias,
983 float rgba[NUM_CHANNELS][QUAD_SIZE])
984 {
985 const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
986 unsigned j;
987 unsigned level = samp->level;
988 unsigned xpot = 1 << (samp->xpot - level);
989 unsigned ypot = 1 << (samp->ypot - level);
990
991 for (j = 0; j < QUAD_SIZE; j++) {
992 int c;
993
994 float u = s[j] * xpot;
995 float v = t[j] * ypot;
996
997 int x0, y0;
998 const uint8_t *out;
999
1000 x0 = util_ifloor(u);
1001 if (x0 < 0)
1002 x0 = 0;
1003 else if (x0 > xpot - 1)
1004 x0 = xpot - 1;
1005
1006 y0 = util_ifloor(v);
1007 if (y0 < 0)
1008 y0 = 0;
1009 else if (y0 > ypot - 1)
1010 y0 = ypot - 1;
1011
1012 out = get_texel_2d_ptr(tgsi_sampler, 0, level, x0, y0);
1013
1014 for (c = 0; c < 4; c++) {
1015 rgba[c][j] = ubyte_to_float(out[c]);
1016 }
1017 }
1018 }
1019
1020
1021 static void
1022 lp_get_samples_2d_linear_mip_linear_repeat_POT(struct tgsi_sampler *tgsi_sampler,
1023 const float s[QUAD_SIZE],
1024 const float t[QUAD_SIZE],
1025 const float p[QUAD_SIZE],
1026 float lodbias,
1027 float rgba[NUM_CHANNELS][QUAD_SIZE])
1028 {
1029 struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
1030 const struct pipe_texture *texture = samp->texture;
1031 int level0;
1032 float lambda;
1033
1034 lambda = compute_lambda(tgsi_sampler, s, t, p, lodbias);
1035 level0 = (int)lambda;
1036
1037 if (lambda < 0.0) {
1038 samp->level = 0;
1039 lp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
1040 s, t, p, 0, rgba );
1041 }
1042 else if (level0 >= texture->last_level) {
1043 samp->level = texture->last_level;
1044 lp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
1045 s, t, p, 0, rgba );
1046 }
1047 else {
1048 float levelBlend = lambda - level0;
1049 float rgba0[4][4];
1050 float rgba1[4][4];
1051 int c,j;
1052
1053 samp->level = level0;
1054 lp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
1055 s, t, p, 0, rgba0 );
1056
1057 samp->level = level0+1;
1058 lp_get_samples_2d_linear_repeat_POT( tgsi_sampler,
1059 s, t, p, 0, rgba1 );
1060
1061 for (j = 0; j < QUAD_SIZE; j++) {
1062 for (c = 0; c < 4; c++) {
1063 rgba[c][j] = lerp(levelBlend, rgba0[c][j], rgba1[c][j]);
1064 }
1065 }
1066 }
1067 }
1068
1069 /**
1070 * Common code for sampling 1D/2D/cube textures.
1071 * Could probably extend for 3D...
1072 */
1073 static void
1074 lp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler,
1075 const float s[QUAD_SIZE],
1076 const float t[QUAD_SIZE],
1077 const float p[QUAD_SIZE],
1078 float lodbias,
1079 float rgba[NUM_CHANNELS][QUAD_SIZE],
1080 const unsigned faces[4])
1081 {
1082 const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
1083 const struct pipe_texture *texture = samp->texture;
1084 const struct pipe_sampler_state *sampler = samp->sampler;
1085 unsigned level0, level1, j, imgFilter;
1086 int width, height;
1087 float levelBlend = 0.0F;
1088
1089 choose_mipmap_levels(tgsi_sampler, s, t, p,
1090 lodbias,
1091 &level0, &level1, &levelBlend, &imgFilter);
1092
1093 assert(sampler->normalized_coords);
1094
1095 width = texture->width[level0];
1096 height = texture->height[level0];
1097
1098 assert(width > 0);
1099
1100 switch (imgFilter) {
1101 case PIPE_TEX_FILTER_NEAREST:
1102 {
1103 int x[4], y[4];
1104 nearest_texcoord_4(sampler->wrap_s, s, width, x);
1105 nearest_texcoord_4(sampler->wrap_t, t, height, y);
1106
1107 for (j = 0; j < QUAD_SIZE; j++) {
1108 get_texel(tgsi_sampler, faces[j], level0, x[j], y[j], 0, rgba, j);
1109 if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
1110 shadow_compare(sampler, rgba, p, j);
1111 }
1112
1113 if (level0 != level1) {
1114 /* get texels from second mipmap level and blend */
1115 float rgba2[4][4];
1116 unsigned c;
1117 x[j] /= 2;
1118 y[j] /= 2;
1119 get_texel(tgsi_sampler, faces[j], level1, x[j], y[j], 0,
1120 rgba2, j);
1121 if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){
1122 shadow_compare(sampler, rgba2, p, j);
1123 }
1124
1125 for (c = 0; c < NUM_CHANNELS; c++) {
1126 rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
1127 }
1128 }
1129 }
1130 }
1131 break;
1132 case PIPE_TEX_FILTER_LINEAR:
1133 case PIPE_TEX_FILTER_ANISO:
1134 {
1135 int x0[4], y0[4], x1[4], y1[4];
1136 float xw[4], yw[4]; /* weights */
1137
1138 linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw);
1139 linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw);
1140
1141 for (j = 0; j < QUAD_SIZE; j++) {
1142 float tx[4][4]; /* texels */
1143 int c;
1144 get_texel(tgsi_sampler, faces[j], level0, x0[j], y0[j], 0, tx, 0);
1145 get_texel(tgsi_sampler, faces[j], level0, x1[j], y0[j], 0, tx, 1);
1146 get_texel(tgsi_sampler, faces[j], level0, x0[j], y1[j], 0, tx, 2);
1147 get_texel(tgsi_sampler, faces[j], level0, x1[j], y1[j], 0, tx, 3);
1148 if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
1149 shadow_compare4(sampler, tx, p);
1150 }
1151
1152 /* interpolate R, G, B, A */
1153 for (c = 0; c < 4; c++) {
1154 rgba[c][j] = lerp_2d(xw[j], yw[j],
1155 tx[c][0], tx[c][1],
1156 tx[c][2], tx[c][3]);
1157 }
1158
1159 if (level0 != level1) {
1160 /* get texels from second mipmap level and blend */
1161 float rgba2[4][4];
1162
1163 /* XXX: This is incorrect -- will often end up with (x0
1164 * == x1 && y0 == y1), meaning that we fetch the same
1165 * texel four times and linearly interpolate between
1166 * identical values. The correct approach would be to
1167 * call linear_texcoord again for the second level.
1168 */
1169 x0[j] /= 2;
1170 y0[j] /= 2;
1171 x1[j] /= 2;
1172 y1[j] /= 2;
1173 get_texel(tgsi_sampler, faces[j], level1, x0[j], y0[j], 0, tx, 0);
1174 get_texel(tgsi_sampler, faces[j], level1, x1[j], y0[j], 0, tx, 1);
1175 get_texel(tgsi_sampler, faces[j], level1, x0[j], y1[j], 0, tx, 2);
1176 get_texel(tgsi_sampler, faces[j], level1, x1[j], y1[j], 0, tx, 3);
1177 if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){
1178 shadow_compare4(sampler, tx, p);
1179 }
1180
1181 /* interpolate R, G, B, A */
1182 for (c = 0; c < 4; c++) {
1183 rgba2[c][j] = lerp_2d(xw[j], yw[j],
1184 tx[c][0], tx[c][1], tx[c][2], tx[c][3]);
1185 }
1186
1187 for (c = 0; c < NUM_CHANNELS; c++) {
1188 rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
1189 }
1190 }
1191 }
1192 }
1193 break;
1194 default:
1195 assert(0);
1196 }
1197 }
1198
1199
1200 static INLINE void
1201 lp_get_samples_1d(struct tgsi_sampler *sampler,
1202 const float s[QUAD_SIZE],
1203 const float t[QUAD_SIZE],
1204 const float p[QUAD_SIZE],
1205 float lodbias,
1206 float rgba[NUM_CHANNELS][QUAD_SIZE])
1207 {
1208 static const unsigned faces[4] = {0, 0, 0, 0};
1209 static const float tzero[4] = {0, 0, 0, 0};
1210 lp_get_samples_2d_common(sampler, s, tzero, NULL,
1211 lodbias, rgba, faces);
1212 }
1213
1214
1215 static INLINE void
1216 lp_get_samples_2d(struct tgsi_sampler *sampler,
1217 const float s[QUAD_SIZE],
1218 const float t[QUAD_SIZE],
1219 const float p[QUAD_SIZE],
1220 float lodbias,
1221 float rgba[NUM_CHANNELS][QUAD_SIZE])
1222 {
1223 static const unsigned faces[4] = {0, 0, 0, 0};
1224 lp_get_samples_2d_common(sampler, s, t, p,
1225 lodbias, rgba, faces);
1226 }
1227
1228
1229 static INLINE void
1230 lp_get_samples_3d(struct tgsi_sampler *tgsi_sampler,
1231 const float s[QUAD_SIZE],
1232 const float t[QUAD_SIZE],
1233 const float p[QUAD_SIZE],
1234 float lodbias,
1235 float rgba[NUM_CHANNELS][QUAD_SIZE])
1236 {
1237 const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
1238 const struct pipe_texture *texture = samp->texture;
1239 const struct pipe_sampler_state *sampler = samp->sampler;
1240 /* get/map pipe_surfaces corresponding to 3D tex slices */
1241 unsigned level0, level1, j, imgFilter;
1242 int width, height, depth;
1243 float levelBlend;
1244 const uint face = 0;
1245
1246 choose_mipmap_levels(tgsi_sampler, s, t, p,
1247 lodbias,
1248 &level0, &level1, &levelBlend, &imgFilter);
1249
1250 assert(sampler->normalized_coords);
1251
1252 width = texture->width[level0];
1253 height = texture->height[level0];
1254 depth = texture->depth[level0];
1255
1256 assert(width > 0);
1257 assert(height > 0);
1258 assert(depth > 0);
1259
1260 switch (imgFilter) {
1261 case PIPE_TEX_FILTER_NEAREST:
1262 {
1263 int x[4], y[4], z[4];
1264 nearest_texcoord_4(sampler->wrap_s, s, width, x);
1265 nearest_texcoord_4(sampler->wrap_t, t, height, y);
1266 nearest_texcoord_4(sampler->wrap_r, p, depth, z);
1267 for (j = 0; j < QUAD_SIZE; j++) {
1268 get_texel(tgsi_sampler, face, level0, x[j], y[j], z[j], rgba, j);
1269 if (level0 != level1) {
1270 /* get texels from second mipmap level and blend */
1271 float rgba2[4][4];
1272 unsigned c;
1273 x[j] /= 2;
1274 y[j] /= 2;
1275 z[j] /= 2;
1276 get_texel(tgsi_sampler, face, level1, x[j], y[j], z[j], rgba2, j);
1277 for (c = 0; c < NUM_CHANNELS; c++) {
1278 rgba[c][j] = lerp(levelBlend, rgba2[c][j], rgba[c][j]);
1279 }
1280 }
1281 }
1282 }
1283 break;
1284 case PIPE_TEX_FILTER_LINEAR:
1285 case PIPE_TEX_FILTER_ANISO:
1286 {
1287 int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4];
1288 float xw[4], yw[4], zw[4]; /* interpolation weights */
1289 linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw);
1290 linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw);
1291 linear_texcoord_4(sampler->wrap_r, p, depth, z0, z1, zw);
1292
1293 for (j = 0; j < QUAD_SIZE; j++) {
1294 int c;
1295 float tx0[4][4], tx1[4][4];
1296 get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z0[j], tx0, 0);
1297 get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z0[j], tx0, 1);
1298 get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z0[j], tx0, 2);
1299 get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z0[j], tx0, 3);
1300 get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z1[j], tx1, 0);
1301 get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z1[j], tx1, 1);
1302 get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z1[j], tx1, 2);
1303 get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z1[j], tx1, 3);
1304
1305 /* interpolate R, G, B, A */
1306 for (c = 0; c < 4; c++) {
1307 rgba[c][j] = lerp_3d(xw[j], yw[j], zw[j],
1308 tx0[c][0], tx0[c][1],
1309 tx0[c][2], tx0[c][3],
1310 tx1[c][0], tx1[c][1],
1311 tx1[c][2], tx1[c][3]);
1312 }
1313
1314 if (level0 != level1) {
1315 /* get texels from second mipmap level and blend */
1316 float rgba2[4][4];
1317 x0[j] /= 2;
1318 y0[j] /= 2;
1319 z0[j] /= 2;
1320 x1[j] /= 2;
1321 y1[j] /= 2;
1322 z1[j] /= 2;
1323 get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z0[j], tx0, 0);
1324 get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z0[j], tx0, 1);
1325 get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z0[j], tx0, 2);
1326 get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z0[j], tx0, 3);
1327 get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z1[j], tx1, 0);
1328 get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z1[j], tx1, 1);
1329 get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z1[j], tx1, 2);
1330 get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z1[j], tx1, 3);
1331
1332 /* interpolate R, G, B, A */
1333 for (c = 0; c < 4; c++) {
1334 rgba2[c][j] = lerp_3d(xw[j], yw[j], zw[j],
1335 tx0[c][0], tx0[c][1],
1336 tx0[c][2], tx0[c][3],
1337 tx1[c][0], tx1[c][1],
1338 tx1[c][2], tx1[c][3]);
1339 }
1340
1341 /* blend mipmap levels */
1342 for (c = 0; c < NUM_CHANNELS; c++) {
1343 rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]);
1344 }
1345 }
1346 }
1347 }
1348 break;
1349 default:
1350 assert(0);
1351 }
1352 }
1353
1354
1355 static void
1356 lp_get_samples_cube(struct tgsi_sampler *sampler,
1357 const float s[QUAD_SIZE],
1358 const float t[QUAD_SIZE],
1359 const float p[QUAD_SIZE],
1360 float lodbias,
1361 float rgba[NUM_CHANNELS][QUAD_SIZE])
1362 {
1363 unsigned faces[QUAD_SIZE], j;
1364 float ssss[4], tttt[4];
1365 for (j = 0; j < QUAD_SIZE; j++) {
1366 faces[j] = choose_cube_face(s[j], t[j], p[j], ssss + j, tttt + j);
1367 }
1368 lp_get_samples_2d_common(sampler, ssss, tttt, NULL,
1369 lodbias, rgba, faces);
1370 }
1371
1372
1373 static void
1374 lp_get_samples_rect(struct tgsi_sampler *tgsi_sampler,
1375 const float s[QUAD_SIZE],
1376 const float t[QUAD_SIZE],
1377 const float p[QUAD_SIZE],
1378 float lodbias,
1379 float rgba[NUM_CHANNELS][QUAD_SIZE])
1380 {
1381 const struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
1382 const struct pipe_texture *texture = samp->texture;
1383 const struct pipe_sampler_state *sampler = samp->sampler;
1384 const uint face = 0;
1385 unsigned level0, level1, j, imgFilter;
1386 int width, height;
1387 float levelBlend;
1388
1389 choose_mipmap_levels(tgsi_sampler, s, t, p,
1390 lodbias,
1391 &level0, &level1, &levelBlend, &imgFilter);
1392
1393 /* texture RECTS cannot be mipmapped */
1394 assert(level0 == level1);
1395
1396 width = texture->width[level0];
1397 height = texture->height[level0];
1398
1399 assert(width > 0);
1400
1401 switch (imgFilter) {
1402 case PIPE_TEX_FILTER_NEAREST:
1403 {
1404 int x[4], y[4];
1405 nearest_texcoord_unnorm_4(sampler->wrap_s, s, width, x);
1406 nearest_texcoord_unnorm_4(sampler->wrap_t, t, height, y);
1407 for (j = 0; j < QUAD_SIZE; j++) {
1408 get_texel(tgsi_sampler, face, level0, x[j], y[j], 0, rgba, j);
1409 if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
1410 shadow_compare(sampler, rgba, p, j);
1411 }
1412 }
1413 }
1414 break;
1415 case PIPE_TEX_FILTER_LINEAR:
1416 case PIPE_TEX_FILTER_ANISO:
1417 {
1418 int x0[4], y0[4], x1[4], y1[4];
1419 float xw[4], yw[4]; /* weights */
1420 linear_texcoord_unnorm_4(sampler->wrap_s, s, width, x0, x1, xw);
1421 linear_texcoord_unnorm_4(sampler->wrap_t, t, height, y0, y1, yw);
1422 for (j = 0; j < QUAD_SIZE; j++) {
1423 float tx[4][4]; /* texels */
1424 int c;
1425 get_texel(tgsi_sampler, face, level0, x0[j], y0[j], 0, tx, 0);
1426 get_texel(tgsi_sampler, face, level0, x1[j], y0[j], 0, tx, 1);
1427 get_texel(tgsi_sampler, face, level0, x0[j], y1[j], 0, tx, 2);
1428 get_texel(tgsi_sampler, face, level0, x1[j], y1[j], 0, tx, 3);
1429 if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
1430 shadow_compare4(sampler, tx, p);
1431 }
1432 for (c = 0; c < 4; c++) {
1433 rgba[c][j] = lerp_2d(xw[j], yw[j],
1434 tx[c][0], tx[c][1], tx[c][2], tx[c][3]);
1435 }
1436 }
1437 }
1438 break;
1439 default:
1440 assert(0);
1441 }
1442 }
1443
1444
1445 /**
1446 * Error condition handler
1447 */
1448 static INLINE void
1449 lp_get_samples_null(struct tgsi_sampler *tgsi_sampler,
1450 const float s[QUAD_SIZE],
1451 const float t[QUAD_SIZE],
1452 const float p[QUAD_SIZE],
1453 float lodbias,
1454 float rgba[NUM_CHANNELS][QUAD_SIZE])
1455 {
1456 int i,j;
1457
1458 for (i = 0; i < 4; i++)
1459 for (j = 0; j < 4; j++)
1460 rgba[i][j] = 1.0;
1461 }
1462
1463 /**
1464 * Called via tgsi_sampler::get_samples() when using a sampler for the
1465 * first time. Determine the actual sampler function, link it in and
1466 * call it.
1467 */
1468 void
1469 lp_get_samples(struct tgsi_sampler *tgsi_sampler,
1470 const float s[QUAD_SIZE],
1471 const float t[QUAD_SIZE],
1472 const float p[QUAD_SIZE],
1473 float lodbias,
1474 float rgba[NUM_CHANNELS][QUAD_SIZE])
1475 {
1476 struct lp_shader_sampler *samp = lp_shader_sampler(tgsi_sampler);
1477 const struct pipe_texture *texture = samp->texture;
1478 const struct pipe_sampler_state *sampler = samp->sampler;
1479
1480 /* Default to the 'undefined' case:
1481 */
1482 tgsi_sampler->get_samples = lp_get_samples_null;
1483
1484 if (!texture) {
1485 assert(0); /* is this legal?? */
1486 goto out;
1487 }
1488
1489 if (!sampler->normalized_coords) {
1490 assert (texture->target == PIPE_TEXTURE_2D);
1491 tgsi_sampler->get_samples = lp_get_samples_rect;
1492 goto out;
1493 }
1494
1495 switch (texture->target) {
1496 case PIPE_TEXTURE_1D:
1497 tgsi_sampler->get_samples = lp_get_samples_1d;
1498 break;
1499 case PIPE_TEXTURE_2D:
1500 tgsi_sampler->get_samples = lp_get_samples_2d;
1501 break;
1502 case PIPE_TEXTURE_3D:
1503 tgsi_sampler->get_samples = lp_get_samples_3d;
1504 break;
1505 case PIPE_TEXTURE_CUBE:
1506 tgsi_sampler->get_samples = lp_get_samples_cube;
1507 break;
1508 default:
1509 assert(0);
1510 break;
1511 }
1512
1513 /* Do this elsewhere:
1514 */
1515 samp->xpot = util_unsigned_logbase2( samp->texture->width[0] );
1516 samp->ypot = util_unsigned_logbase2( samp->texture->height[0] );
1517
1518 /* Try to hook in a faster sampler. Ultimately we'll have to
1519 * code-generate these. Luckily most of this looks like it is
1520 * orthogonal state within the sampler.
1521 */
1522 if (texture->target == PIPE_TEXTURE_2D &&
1523 sampler->min_img_filter == sampler->mag_img_filter &&
1524 sampler->wrap_s == sampler->wrap_t &&
1525 sampler->compare_mode == FALSE &&
1526 sampler->normalized_coords)
1527 {
1528 if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) {
1529 samp->level = CLAMP((int) sampler->min_lod,
1530 0, (int) texture->last_level);
1531
1532 if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) {
1533 switch (sampler->min_img_filter) {
1534 case PIPE_TEX_FILTER_NEAREST:
1535 tgsi_sampler->get_samples = lp_get_samples_2d_nearest_repeat_POT;
1536 break;
1537 case PIPE_TEX_FILTER_LINEAR:
1538 tgsi_sampler->get_samples = lp_get_samples_2d_linear_repeat_POT;
1539 break;
1540 default:
1541 break;
1542 }
1543 }
1544 else if (sampler->wrap_s == PIPE_TEX_WRAP_CLAMP) {
1545 switch (sampler->min_img_filter) {
1546 case PIPE_TEX_FILTER_NEAREST:
1547 tgsi_sampler->get_samples = lp_get_samples_2d_nearest_clamp_POT;
1548 break;
1549 default:
1550 break;
1551 }
1552 }
1553 }
1554 else if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1555 if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) {
1556 switch (sampler->min_img_filter) {
1557 case PIPE_TEX_FILTER_LINEAR:
1558 tgsi_sampler->get_samples = lp_get_samples_2d_linear_mip_linear_repeat_POT;
1559 break;
1560 default:
1561 break;
1562 }
1563 }
1564 }
1565 }
1566 else if (0) {
1567 _debug_printf("target %d/%d min_mip %d/%d min_img %d/%d wrap %d/%d compare %d/%d norm %d/%d\n",
1568 texture->target, PIPE_TEXTURE_2D,
1569 sampler->min_mip_filter, PIPE_TEX_MIPFILTER_NONE,
1570 sampler->min_img_filter, sampler->mag_img_filter,
1571 sampler->wrap_s, sampler->wrap_t,
1572 sampler->compare_mode, FALSE,
1573 sampler->normalized_coords, TRUE);
1574 }
1575
1576 out:
1577 tgsi_sampler->get_samples( tgsi_sampler, s, t, p, lodbias, rgba );
1578 }
1579
1580
1581 void PIPE_CDECL
1582 lp_fetch_texel_soa( struct tgsi_sampler **samplers,
1583 uint32_t unit,
1584 float *store )
1585 {
1586 struct tgsi_sampler *sampler = samplers[unit];
1587
1588 #if 0
1589 uint j;
1590
1591 debug_printf("%s sampler: %p (%p) store: %p\n",
1592 __FUNCTION__,
1593 sampler, *sampler,
1594 store );
1595
1596 debug_printf("lodbias %f\n", store[12]);
1597
1598 for (j = 0; j < 4; j++)
1599 debug_printf("sample %d texcoord %f %f\n",
1600 j,
1601 store[0+j],
1602 store[4+j]);
1603 #endif
1604
1605 {
1606 float rgba[NUM_CHANNELS][QUAD_SIZE];
1607 sampler->get_samples(sampler,
1608 &store[0],
1609 &store[4],
1610 &store[8],
1611 0.0f, /*store[12], lodbias */
1612 rgba);
1613 memcpy(store, rgba, sizeof rgba);
1614 }
1615
1616 #if 0
1617 for (j = 0; j < 4; j++)
1618 debug_printf("sample %d result %f %f %f %f\n",
1619 j,
1620 store[0+j],
1621 store[4+j],
1622 store[8+j],
1623 store[12+j]);
1624 #endif
1625 }
1626
1627
1628 #include "lp_bld_type.h"
1629 #include "lp_bld_intr.h"
1630 #include "lp_bld_tgsi.h"
1631
1632
1633 struct lp_c_sampler_soa
1634 {
1635 struct lp_build_sampler_soa base;
1636
1637 LLVMValueRef context_ptr;
1638
1639 LLVMValueRef samplers_ptr;
1640
1641 /** Coords/texels store */
1642 LLVMValueRef store_ptr;
1643 };
1644
1645
1646 static void
1647 lp_c_sampler_soa_destroy(struct lp_build_sampler_soa *sampler)
1648 {
1649 FREE(sampler);
1650 }
1651
1652
1653 static void
1654 lp_c_sampler_soa_emit_fetch_texel(struct lp_build_sampler_soa *_sampler,
1655 LLVMBuilderRef builder,
1656 struct lp_type type,
1657 unsigned unit,
1658 unsigned num_coords,
1659 const LLVMValueRef *coords,
1660 LLVMValueRef lodbias,
1661 LLVMValueRef *texel)
1662 {
1663 struct lp_c_sampler_soa *sampler = (struct lp_c_sampler_soa *)_sampler;
1664 LLVMTypeRef vec_type = LLVMTypeOf(coords[0]);
1665 LLVMValueRef args[3];
1666 unsigned i;
1667
1668 if(!sampler->samplers_ptr)
1669 sampler->samplers_ptr = lp_jit_context_samplers(builder, sampler->context_ptr);
1670
1671 if(!sampler->store_ptr)
1672 sampler->store_ptr = LLVMBuildArrayAlloca(builder,
1673 vec_type,
1674 LLVMConstInt(LLVMInt32Type(), 4, 0),
1675 "texel_store");
1676
1677 for (i = 0; i < num_coords; i++) {
1678 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
1679 LLVMValueRef coord_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, "");
1680 LLVMBuildStore(builder, coords[i], coord_ptr);
1681 }
1682
1683 args[0] = sampler->samplers_ptr;
1684 args[1] = LLVMConstInt(LLVMInt32Type(), unit, 0);
1685 args[2] = sampler->store_ptr;
1686
1687 lp_build_intrinsic(builder, "fetch_texel", LLVMVoidType(), args, 3);
1688
1689 for (i = 0; i < NUM_CHANNELS; ++i) {
1690 LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
1691 LLVMValueRef texel_ptr = LLVMBuildGEP(builder, sampler->store_ptr, &index, 1, "");
1692 texel[i] = LLVMBuildLoad(builder, texel_ptr, "");
1693 }
1694 }
1695
1696
1697 struct lp_build_sampler_soa *
1698 lp_c_sampler_soa_create(LLVMValueRef context_ptr)
1699 {
1700 struct lp_c_sampler_soa *sampler;
1701
1702 sampler = CALLOC_STRUCT(lp_c_sampler_soa);
1703 if(!sampler)
1704 return NULL;
1705
1706 sampler->base.destroy = lp_c_sampler_soa_destroy;
1707 sampler->base.emit_fetch_texel = lp_c_sampler_soa_emit_fetch_texel;
1708 sampler->context_ptr = context_ptr;
1709
1710 return &sampler->base;
1711 }
1712