Merge branch 'mesa_7_5_branch' into mesa_7_6_branch
[mesa.git] / src / gallium / drivers / cell / spu / spu_texture.c
1 /**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 #include <math.h>
30
31 #include "pipe/p_compiler.h"
32 #include "spu_main.h"
33 #include "spu_texture.h"
34 #include "spu_tile.h"
35 #include "spu_colorpack.h"
36 #include "spu_dcache.h"
37
38
39 /**
40 * Mark all tex cache entries as invalid.
41 */
42 void
43 invalidate_tex_cache(void)
44 {
45 uint lvl;
46 for (lvl = 0; lvl < CELL_MAX_TEXTURE_LEVELS; lvl++) {
47 uint unit = 0;
48 uint bytes = 4 * spu.texture[unit].level[lvl].width
49 * spu.texture[unit].level[lvl].height;
50
51 if (spu.texture[unit].target == PIPE_TEXTURE_CUBE)
52 bytes *= 6;
53 else if (spu.texture[unit].target == PIPE_TEXTURE_3D)
54 bytes *= spu.texture[unit].level[lvl].depth;
55
56 spu_dcache_mark_dirty((unsigned) spu.texture[unit].level[lvl].start, bytes);
57 }
58 }
59
60
61 /**
62 * Get four texels from locations (x[0], y[0]), (x[1], y[1]) ...
63 *
64 * NOTE: in the typical case of bilinear filtering, the four texels
65 * are in a 2x2 group so we could get by with just two dcache fetches
66 * (two side-by-side texels per fetch). But when bilinear filtering
67 * wraps around a texture edge, we'll probably need code like we have
68 * now.
69 * FURTHERMORE: since we're rasterizing a quad of 2x2 pixels at a time,
70 * it's quite likely that the four pixels in a quad will need some of the
71 * same texels. So look into doing texture fetches for four pixels at
72 * a time.
73 */
74 static void
75 get_four_texels(const struct spu_texture_level *tlevel, uint face,
76 vec_int4 x, vec_int4 y,
77 vec_uint4 *texels)
78 {
79 unsigned texture_ea = (uintptr_t) tlevel->start;
80 const vec_int4 tile_x = spu_rlmask(x, -5); /* tile_x = x / 32 */
81 const vec_int4 tile_y = spu_rlmask(y, -5); /* tile_y = y / 32 */
82 const qword offset_x = si_andi((qword) x, 0x1f); /* offset_x = x & 0x1f */
83 const qword offset_y = si_andi((qword) y, 0x1f); /* offset_y = y & 0x1f */
84
85 const qword tiles_per_row = (qword) spu_splats(tlevel->tiles_per_row);
86 const qword tile_size = (qword) spu_splats((unsigned) sizeof(tile_t));
87
88 qword tile_offset = si_mpya((qword) tile_y, tiles_per_row, (qword) tile_x);
89 tile_offset = si_mpy((qword) tile_offset, tile_size);
90
91 qword texel_offset = si_a(si_mpyui(offset_y, 32), offset_x);
92 texel_offset = si_mpyui(texel_offset, 4);
93
94 vec_uint4 offset = (vec_uint4) si_a(tile_offset, texel_offset);
95
96 texture_ea = texture_ea + face * tlevel->bytes_per_image;
97
98 spu_dcache_fetch_unaligned((qword *) & texels[0],
99 texture_ea + spu_extract(offset, 0), 4);
100 spu_dcache_fetch_unaligned((qword *) & texels[1],
101 texture_ea + spu_extract(offset, 1), 4);
102 spu_dcache_fetch_unaligned((qword *) & texels[2],
103 texture_ea + spu_extract(offset, 2), 4);
104 spu_dcache_fetch_unaligned((qword *) & texels[3],
105 texture_ea + spu_extract(offset, 3), 4);
106 }
107
108
109 /** clamp vec to [0, max] */
110 static INLINE vector signed int
111 spu_clamp(vector signed int vec, vector signed int max)
112 {
113 static const vector signed int zero = {0,0,0,0};
114 vector unsigned int c;
115 c = spu_cmpgt(vec, zero); /* c = vec > zero ? ~0 : 0 */
116 vec = spu_sel(zero, vec, c);
117 c = spu_cmpgt(vec, max); /* c = vec > max ? ~0 : 0 */
118 vec = spu_sel(vec, max, c);
119 return vec;
120 }
121
122
123
124 /**
125 * Do nearest texture sampling for four pixels.
126 * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa).
127 */
128 void
129 sample_texture_2d_nearest(vector float s, vector float t,
130 uint unit, uint level, uint face,
131 vector float colors[4])
132 {
133 const struct spu_texture_level *tlevel = &spu.texture[unit].level[level];
134 vector float ss = spu_mul(s, tlevel->scale_s);
135 vector float tt = spu_mul(t, tlevel->scale_t);
136 vector signed int is = spu_convts(ss, 0);
137 vector signed int it = spu_convts(tt, 0);
138 vec_uint4 texels[4];
139
140 /* PIPE_TEX_WRAP_REPEAT */
141 is = spu_and(is, tlevel->mask_s);
142 it = spu_and(it, tlevel->mask_t);
143
144 /* PIPE_TEX_WRAP_CLAMP */
145 is = spu_clamp(is, tlevel->max_s);
146 it = spu_clamp(it, tlevel->max_t);
147
148 get_four_texels(tlevel, face, is, it, texels);
149
150 /* convert four packed ARGBA pixels to float RRRR,GGGG,BBBB,AAAA */
151 spu_unpack_A8R8G8B8_transpose4(texels, colors);
152 }
153
154
155 /**
156 * Do bilinear texture sampling for four pixels.
157 * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa).
158 */
159 void
160 sample_texture_2d_bilinear(vector float s, vector float t,
161 uint unit, uint level, uint face,
162 vector float colors[4])
163 {
164 const struct spu_texture_level *tlevel = &spu.texture[unit].level[level];
165 static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f};
166
167 vector float ss = spu_madd(s, tlevel->scale_s, half);
168 vector float tt = spu_madd(t, tlevel->scale_t, half);
169
170 vector signed int is0 = spu_convts(ss, 0);
171 vector signed int it0 = spu_convts(tt, 0);
172
173 /* is + 1, it + 1 */
174 vector signed int is1 = spu_add(is0, 1);
175 vector signed int it1 = spu_add(it0, 1);
176
177 /* PIPE_TEX_WRAP_REPEAT */
178 is0 = spu_and(is0, tlevel->mask_s);
179 it0 = spu_and(it0, tlevel->mask_t);
180 is1 = spu_and(is1, tlevel->mask_s);
181 it1 = spu_and(it1, tlevel->mask_t);
182
183 /* PIPE_TEX_WRAP_CLAMP */
184 is0 = spu_clamp(is0, tlevel->max_s);
185 it0 = spu_clamp(it0, tlevel->max_t);
186 is1 = spu_clamp(is1, tlevel->max_s);
187 it1 = spu_clamp(it1, tlevel->max_t);
188
189 /* get packed int texels */
190 vector unsigned int texels[16];
191 get_four_texels(tlevel, face, is0, it0, texels + 0); /* upper-left */
192 get_four_texels(tlevel, face, is1, it0, texels + 4); /* upper-right */
193 get_four_texels(tlevel, face, is0, it1, texels + 8); /* lower-left */
194 get_four_texels(tlevel, face, is1, it1, texels + 12); /* lower-right */
195
196 /* convert packed int texels to float colors */
197 vector float ftexels[16];
198 spu_unpack_A8R8G8B8_transpose4(texels + 0, ftexels + 0);
199 spu_unpack_A8R8G8B8_transpose4(texels + 4, ftexels + 4);
200 spu_unpack_A8R8G8B8_transpose4(texels + 8, ftexels + 8);
201 spu_unpack_A8R8G8B8_transpose4(texels + 12, ftexels + 12);
202
203 /* Compute weighting factors in [0,1]
204 * Multiply texcoord by 1024, AND with 1023, convert back to float.
205 */
206 vector float ss1024 = spu_mul(ss, spu_splats(1024.0f));
207 vector signed int iss1024 = spu_convts(ss1024, 0);
208 iss1024 = spu_and(iss1024, 1023);
209 vector float sWeights0 = spu_convtf(iss1024, 10);
210
211 vector float tt1024 = spu_mul(tt, spu_splats(1024.0f));
212 vector signed int itt1024 = spu_convts(tt1024, 0);
213 itt1024 = spu_and(itt1024, 1023);
214 vector float tWeights0 = spu_convtf(itt1024, 10);
215
216 /* 1 - sWeight and 1 - tWeight */
217 vector float sWeights1 = spu_sub(spu_splats(1.0f), sWeights0);
218 vector float tWeights1 = spu_sub(spu_splats(1.0f), tWeights0);
219
220 /* reds, for four pixels */
221 ftexels[ 0] = spu_mul(ftexels[ 0], spu_mul(sWeights1, tWeights1)); /*ul*/
222 ftexels[ 4] = spu_mul(ftexels[ 4], spu_mul(sWeights0, tWeights1)); /*ur*/
223 ftexels[ 8] = spu_mul(ftexels[ 8], spu_mul(sWeights1, tWeights0)); /*ll*/
224 ftexels[12] = spu_mul(ftexels[12], spu_mul(sWeights0, tWeights0)); /*lr*/
225 colors[0] = spu_add(spu_add(ftexels[0], ftexels[4]),
226 spu_add(ftexels[8], ftexels[12]));
227
228 /* greens, for four pixels */
229 ftexels[ 1] = spu_mul(ftexels[ 1], spu_mul(sWeights1, tWeights1)); /*ul*/
230 ftexels[ 5] = spu_mul(ftexels[ 5], spu_mul(sWeights0, tWeights1)); /*ur*/
231 ftexels[ 9] = spu_mul(ftexels[ 9], spu_mul(sWeights1, tWeights0)); /*ll*/
232 ftexels[13] = spu_mul(ftexels[13], spu_mul(sWeights0, tWeights0)); /*lr*/
233 colors[1] = spu_add(spu_add(ftexels[1], ftexels[5]),
234 spu_add(ftexels[9], ftexels[13]));
235
236 /* blues, for four pixels */
237 ftexels[ 2] = spu_mul(ftexels[ 2], spu_mul(sWeights1, tWeights1)); /*ul*/
238 ftexels[ 6] = spu_mul(ftexels[ 6], spu_mul(sWeights0, tWeights1)); /*ur*/
239 ftexels[10] = spu_mul(ftexels[10], spu_mul(sWeights1, tWeights0)); /*ll*/
240 ftexels[14] = spu_mul(ftexels[14], spu_mul(sWeights0, tWeights0)); /*lr*/
241 colors[2] = spu_add(spu_add(ftexels[2], ftexels[6]),
242 spu_add(ftexels[10], ftexels[14]));
243
244 /* alphas, for four pixels */
245 ftexels[ 3] = spu_mul(ftexels[ 3], spu_mul(sWeights1, tWeights1)); /*ul*/
246 ftexels[ 7] = spu_mul(ftexels[ 7], spu_mul(sWeights0, tWeights1)); /*ur*/
247 ftexels[11] = spu_mul(ftexels[11], spu_mul(sWeights1, tWeights0)); /*ll*/
248 ftexels[15] = spu_mul(ftexels[15], spu_mul(sWeights0, tWeights0)); /*lr*/
249 colors[3] = spu_add(spu_add(ftexels[3], ftexels[7]),
250 spu_add(ftexels[11], ftexels[15]));
251 }
252
253
254
255 /**
256 * Adapted from /opt/cell/sdk/usr/spu/include/transpose_matrix4x4.h
257 */
258 static INLINE void
259 transpose(vector unsigned int *mOut0,
260 vector unsigned int *mOut1,
261 vector unsigned int *mOut2,
262 vector unsigned int *mOut3,
263 vector unsigned int *mIn)
264 {
265 vector unsigned int abcd, efgh, ijkl, mnop; /* input vectors */
266 vector unsigned int aeim, bfjn, cgko, dhlp; /* output vectors */
267 vector unsigned int aibj, ckdl, emfn, gohp; /* intermediate vectors */
268
269 vector unsigned char shufflehi = ((vector unsigned char) {
270 0x00, 0x01, 0x02, 0x03,
271 0x10, 0x11, 0x12, 0x13,
272 0x04, 0x05, 0x06, 0x07,
273 0x14, 0x15, 0x16, 0x17});
274 vector unsigned char shufflelo = ((vector unsigned char) {
275 0x08, 0x09, 0x0A, 0x0B,
276 0x18, 0x19, 0x1A, 0x1B,
277 0x0C, 0x0D, 0x0E, 0x0F,
278 0x1C, 0x1D, 0x1E, 0x1F});
279 abcd = *(mIn+0);
280 efgh = *(mIn+1);
281 ijkl = *(mIn+2);
282 mnop = *(mIn+3);
283
284 aibj = spu_shuffle(abcd, ijkl, shufflehi);
285 ckdl = spu_shuffle(abcd, ijkl, shufflelo);
286 emfn = spu_shuffle(efgh, mnop, shufflehi);
287 gohp = spu_shuffle(efgh, mnop, shufflelo);
288
289 aeim = spu_shuffle(aibj, emfn, shufflehi);
290 bfjn = spu_shuffle(aibj, emfn, shufflelo);
291 cgko = spu_shuffle(ckdl, gohp, shufflehi);
292 dhlp = spu_shuffle(ckdl, gohp, shufflelo);
293
294 *mOut0 = aeim;
295 *mOut1 = bfjn;
296 *mOut2 = cgko;
297 *mOut3 = dhlp;
298 }
299
300
301 /**
302 * Bilinear filtering, using int instead of float arithmetic for computing
303 * sample weights.
304 */
305 void
306 sample_texture_2d_bilinear_int(vector float s, vector float t,
307 uint unit, uint level, uint face,
308 vector float colors[4])
309 {
310 const struct spu_texture_level *tlevel = &spu.texture[unit].level[level];
311 static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f};
312
313 /* Scale texcoords by size of texture, and add half pixel bias */
314 vector float ss = spu_madd(s, tlevel->scale_s, half);
315 vector float tt = spu_madd(t, tlevel->scale_t, half);
316
317 /* convert float coords to fixed-pt coords with 7 fraction bits */
318 vector signed int is = spu_convts(ss, 7); /* XXX really need floor() here */
319 vector signed int it = spu_convts(tt, 7); /* XXX really need floor() here */
320
321 /* compute integer texel weights in [0, 127] */
322 vector signed int sWeights0 = spu_and(is, 127);
323 vector signed int tWeights0 = spu_and(it, 127);
324 vector signed int sWeights1 = spu_sub(127, sWeights0);
325 vector signed int tWeights1 = spu_sub(127, tWeights0);
326
327 /* texel coords: is0 = is / 128, it0 = is / 128 */
328 vector signed int is0 = spu_rlmask(is, -7);
329 vector signed int it0 = spu_rlmask(it, -7);
330
331 /* texel coords: i1 = is0 + 1, it1 = it0 + 1 */
332 vector signed int is1 = spu_add(is0, 1);
333 vector signed int it1 = spu_add(it0, 1);
334
335 /* PIPE_TEX_WRAP_REPEAT */
336 is0 = spu_and(is0, tlevel->mask_s);
337 it0 = spu_and(it0, tlevel->mask_t);
338 is1 = spu_and(is1, tlevel->mask_s);
339 it1 = spu_and(it1, tlevel->mask_t);
340
341 /* PIPE_TEX_WRAP_CLAMP */
342 is0 = spu_clamp(is0, tlevel->max_s);
343 it0 = spu_clamp(it0, tlevel->max_t);
344 is1 = spu_clamp(is1, tlevel->max_s);
345 it1 = spu_clamp(it1, tlevel->max_t);
346
347 /* get packed int texels */
348 vector unsigned int texels[16];
349 get_four_texels(tlevel, face, is0, it0, texels + 0); /* upper-left */
350 get_four_texels(tlevel, face, is1, it0, texels + 4); /* upper-right */
351 get_four_texels(tlevel, face, is0, it1, texels + 8); /* lower-left */
352 get_four_texels(tlevel, face, is1, it1, texels + 12); /* lower-right */
353
354 /* twiddle packed 32-bit BGRA pixels into RGBA as four unsigned ints */
355 {
356 static const unsigned char ZERO = 0x80;
357 int i;
358 for (i = 0; i < 16; i++) {
359 texels[i] = spu_shuffle(texels[i], texels[i],
360 ((vector unsigned char) {
361 ZERO, ZERO, ZERO, 1,
362 ZERO, ZERO, ZERO, 2,
363 ZERO, ZERO, ZERO, 3,
364 ZERO, ZERO, ZERO, 0}));
365 }
366 }
367
368 /* convert RGBA,RGBA,RGBA,RGBA to RRRR,GGGG,BBBB,AAAA */
369 vector unsigned int texel0, texel1, texel2, texel3, texel4, texel5, texel6, texel7,
370 texel8, texel9, texel10, texel11, texel12, texel13, texel14, texel15;
371 transpose(&texel0, &texel1, &texel2, &texel3, texels + 0);
372 transpose(&texel4, &texel5, &texel6, &texel7, texels + 4);
373 transpose(&texel8, &texel9, &texel10, &texel11, texels + 8);
374 transpose(&texel12, &texel13, &texel14, &texel15, texels + 12);
375
376 /* computed weighted colors */
377 vector unsigned int c0, c1, c2, c3, cSum;
378
379 /* red */
380 c0 = (vector unsigned int) si_mpy((qword) texel0, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/
381 c1 = (vector unsigned int) si_mpy((qword) texel4, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/
382 c2 = (vector unsigned int) si_mpy((qword) texel8, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/
383 c3 = (vector unsigned int) si_mpy((qword) texel12, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/
384 cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
385 colors[0] = spu_convtf(cSum, 22);
386
387 /* green */
388 c0 = (vector unsigned int) si_mpy((qword) texel1, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/
389 c1 = (vector unsigned int) si_mpy((qword) texel5, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/
390 c2 = (vector unsigned int) si_mpy((qword) texel9, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/
391 c3 = (vector unsigned int) si_mpy((qword) texel13, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/
392 cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
393 colors[1] = spu_convtf(cSum, 22);
394
395 /* blue */
396 c0 = (vector unsigned int) si_mpy((qword) texel2, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/
397 c1 = (vector unsigned int) si_mpy((qword) texel6, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/
398 c2 = (vector unsigned int) si_mpy((qword) texel10, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/
399 c3 = (vector unsigned int) si_mpy((qword) texel14, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/
400 cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
401 colors[2] = spu_convtf(cSum, 22);
402
403 /* alpha */
404 c0 = (vector unsigned int) si_mpy((qword) texel3, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/
405 c1 = (vector unsigned int) si_mpy((qword) texel7, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/
406 c2 = (vector unsigned int) si_mpy((qword) texel11, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/
407 c3 = (vector unsigned int) si_mpy((qword) texel15, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/
408 cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
409 colors[3] = spu_convtf(cSum, 22);
410 }
411
412
413
414 /**
415 * Compute level of detail factor from texcoords.
416 */
417 static INLINE float
418 compute_lambda_2d(uint unit, vector float s, vector float t)
419 {
420 uint baseLevel = 0;
421 float width = spu.texture[unit].level[baseLevel].width;
422 float height = spu.texture[unit].level[baseLevel].width;
423 float dsdx = width * (spu_extract(s, 1) - spu_extract(s, 0));
424 float dsdy = width * (spu_extract(s, 2) - spu_extract(s, 0));
425 float dtdx = height * (spu_extract(t, 1) - spu_extract(t, 0));
426 float dtdy = height * (spu_extract(t, 2) - spu_extract(t, 0));
427 #if 0
428 /* ideal value */
429 float x = dsdx * dsdx + dtdx * dtdx;
430 float y = dsdy * dsdy + dtdy * dtdy;
431 float rho = x > y ? x : y;
432 rho = sqrtf(rho);
433 #else
434 /* approximation */
435 dsdx = fabsf(dsdx);
436 dsdy = fabsf(dsdy);
437 dtdx = fabsf(dtdx);
438 dtdy = fabsf(dtdy);
439 float rho = (dsdx + dsdy + dtdx + dtdy) * 0.5;
440 #endif
441 float lambda = logf(rho) * 1.442695f; /* compute logbase2(rho) */
442 return lambda;
443 }
444
445
446 /**
447 * Blend two sets of colors according to weight.
448 */
449 static void
450 blend_colors(vector float c0[4], const vector float c1[4], float weight)
451 {
452 vector float t = spu_splats(weight);
453 vector float dc0 = spu_sub(c1[0], c0[0]);
454 vector float dc1 = spu_sub(c1[1], c0[1]);
455 vector float dc2 = spu_sub(c1[2], c0[2]);
456 vector float dc3 = spu_sub(c1[3], c0[3]);
457 c0[0] = spu_madd(dc0, t, c0[0]);
458 c0[1] = spu_madd(dc1, t, c0[1]);
459 c0[2] = spu_madd(dc2, t, c0[2]);
460 c0[3] = spu_madd(dc3, t, c0[3]);
461 }
462
463
464 /**
465 * Texture sampling with level of detail selection and possibly mipmap
466 * interpolation.
467 */
468 void
469 sample_texture_2d_lod(vector float s, vector float t,
470 uint unit, uint level_ignored, uint face,
471 vector float colors[4])
472 {
473 /*
474 * Note that we're computing a lambda/lod here that's used for all
475 * four pixels in the quad.
476 */
477 float lambda = compute_lambda_2d(unit, s, t);
478
479 (void) face;
480 (void) level_ignored;
481
482 /* apply lod bias */
483 lambda += spu.sampler[unit].lod_bias;
484
485 /* clamp */
486 if (lambda < spu.sampler[unit].min_lod)
487 lambda = spu.sampler[unit].min_lod;
488 else if (lambda > spu.sampler[unit].max_lod)
489 lambda = spu.sampler[unit].max_lod;
490
491 if (lambda <= 0.0f) {
492 /* magnify */
493 spu.mag_sample_texture_2d[unit](s, t, unit, 0, face, colors);
494 }
495 else {
496 /* minify */
497 if (spu.sampler[unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) {
498 /* sample two mipmap levels and interpolate */
499 int level = (int) lambda;
500 if (level > (int) spu.texture[unit].max_level)
501 level = spu.texture[unit].max_level;
502 spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors);
503 if (spu.sampler[unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) {
504 /* sample second mipmap level */
505 float weight = lambda - (float) level;
506 level++;
507 if (level <= (int) spu.texture[unit].max_level) {
508 vector float colors2[4];
509 spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors2);
510 blend_colors(colors, colors2, weight);
511 }
512 }
513 }
514 else {
515 /* sample one mipmap level */
516 int level = (int) (lambda + 0.5f);
517 if (level > (int) spu.texture[unit].max_level)
518 level = spu.texture[unit].max_level;
519 spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors);
520 }
521 }
522 }
523
524
525 /** XXX need a SIMD version of this */
526 static unsigned
527 choose_cube_face(float rx, float ry, float rz, float *newS, float *newT)
528 {
529 /*
530 major axis
531 direction target sc tc ma
532 ---------- ------------------------------- --- --- ---
533 +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx
534 -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx
535 +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry
536 -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry
537 +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz
538 -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz
539 */
540 const float arx = fabsf(rx);
541 const float ary = fabsf(ry);
542 const float arz = fabsf(rz);
543 unsigned face;
544 float sc, tc, ma;
545
546 if (arx > ary && arx > arz) {
547 if (rx >= 0.0F) {
548 face = PIPE_TEX_FACE_POS_X;
549 sc = -rz;
550 tc = -ry;
551 ma = arx;
552 }
553 else {
554 face = PIPE_TEX_FACE_NEG_X;
555 sc = rz;
556 tc = -ry;
557 ma = arx;
558 }
559 }
560 else if (ary > arx && ary > arz) {
561 if (ry >= 0.0F) {
562 face = PIPE_TEX_FACE_POS_Y;
563 sc = rx;
564 tc = rz;
565 ma = ary;
566 }
567 else {
568 face = PIPE_TEX_FACE_NEG_Y;
569 sc = rx;
570 tc = -rz;
571 ma = ary;
572 }
573 }
574 else {
575 if (rz > 0.0F) {
576 face = PIPE_TEX_FACE_POS_Z;
577 sc = rx;
578 tc = -ry;
579 ma = arz;
580 }
581 else {
582 face = PIPE_TEX_FACE_NEG_Z;
583 sc = -rx;
584 tc = -ry;
585 ma = arz;
586 }
587 }
588
589 *newS = (sc / ma + 1.0F) * 0.5F;
590 *newT = (tc / ma + 1.0F) * 0.5F;
591
592 return face;
593 }
594
595
596
597 void
598 sample_texture_cube(vector float s, vector float t, vector float r,
599 uint unit, vector float colors[4])
600 {
601 uint p, faces[4], level = 0;
602 float newS[4], newT[4];
603
604 /* Compute cube faces referenced by the four sets of texcoords.
605 * XXX we should SIMD-ize this.
606 */
607 for (p = 0; p < 4; p++) {
608 float rx = spu_extract(s, p);
609 float ry = spu_extract(t, p);
610 float rz = spu_extract(r, p);
611 faces[p] = choose_cube_face(rx, ry, rz, &newS[p], &newT[p]);
612 }
613
614 if (faces[0] == faces[1] &&
615 faces[0] == faces[2] &&
616 faces[0] == faces[3]) {
617 /* GOOD! All four texcoords refer to the same cube face */
618 s = (vector float) {newS[0], newS[1], newS[2], newS[3]};
619 t = (vector float) {newT[0], newT[1], newT[2], newT[3]};
620 spu.sample_texture_2d[unit](s, t, unit, level, faces[0], colors);
621 }
622 else {
623 /* BAD! The four texcoords refer to different faces */
624 for (p = 0; p < 4; p++) {
625 vector float c[4];
626
627 spu.sample_texture_2d[unit](spu_splats(newS[p]), spu_splats(newT[p]),
628 unit, level, faces[p], c);
629
630 float red = spu_extract(c[0], p);
631 float green = spu_extract(c[1], p);
632 float blue = spu_extract(c[2], p);
633 float alpha = spu_extract(c[3], p);
634
635 colors[0] = spu_insert(red, colors[0], p);
636 colors[1] = spu_insert(green, colors[1], p);
637 colors[2] = spu_insert(blue, colors[2], p);
638 colors[3] = spu_insert(alpha, colors[3], p);
639 }
640 }
641 }