cell: update comments
[mesa.git] / src / gallium / drivers / cell / spu / spu_texture.c
1 /**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 #include <math.h>
30
31 #include "pipe/p_compiler.h"
32 #include "spu_main.h"
33 #include "spu_texture.h"
34 #include "spu_tile.h"
35 #include "spu_colorpack.h"
36 #include "spu_dcache.h"
37
38
39 /**
40 * Mark all tex cache entries as invalid.
41 */
42 void
43 invalidate_tex_cache(void)
44 {
45 uint lvl;
46 for (lvl = 0; lvl < CELL_MAX_TEXTURE_LEVELS; lvl++) {
47 uint unit = 0;
48 uint bytes = 4 * spu.texture[unit].level[lvl].width
49 * spu.texture[unit].level[lvl].height;
50
51 if (spu.texture[unit].target == PIPE_TEXTURE_CUBE)
52 bytes *= 6;
53 else if (spu.texture[unit].target == PIPE_TEXTURE_3D)
54 bytes *= spu.texture[unit].level[lvl].depth;
55
56 spu_dcache_mark_dirty((unsigned) spu.texture[unit].level[lvl].start, bytes);
57 }
58 }
59
60
61 /**
62 * Get four texels from locations (x[0], y[0]), (x[1], y[1]) ...
63 *
64 * NOTE: in the typical case of bilinear filtering, the four texels
65 * are in a 2x2 group so we could get by with just two dcache fetches
66 * (two side-by-side texels per fetch). But when bilinear filtering
67 * wraps around a texture edge, we'll probably need code like we have
68 * now.
69 * FURTHERMORE: since we're rasterizing a quad of 2x2 pixels at a time,
70 * it's quite likely that the four pixels in a quad will need some of the
71 * same texels. So look into doing texture fetches for four pixels at
72 * a time.
73 */
74 static void
75 get_four_texels(uint unit, uint level, uint face, vec_int4 x, vec_int4 y,
76 vec_uint4 *texels)
77 {
78 const struct spu_texture_level *tlevel = &spu.texture[unit].level[level];
79 unsigned texture_ea = (uintptr_t) tlevel->start;
80 const vec_int4 tile_x = spu_rlmask(x, -5); /* tile_x = x / 32 */
81 const vec_int4 tile_y = spu_rlmask(y, -5); /* tile_y = y / 32 */
82 const qword offset_x = si_andi((qword) x, 0x1f); /* offset_x = x & 0x1f */
83 const qword offset_y = si_andi((qword) y, 0x1f); /* offset_y = y & 0x1f */
84
85 const qword tiles_per_row = (qword) spu_splats(tlevel->tiles_per_row);
86 const qword tile_size = (qword) spu_splats((unsigned) sizeof(tile_t));
87
88 qword tile_offset = si_mpya((qword) tile_y, tiles_per_row, (qword) tile_x);
89 tile_offset = si_mpy((qword) tile_offset, tile_size);
90
91 qword texel_offset = si_a(si_mpyui(offset_y, 32), offset_x);
92 texel_offset = si_mpyui(texel_offset, 4);
93
94 vec_uint4 offset = (vec_uint4) si_a(tile_offset, texel_offset);
95
96 texture_ea = texture_ea + face * tlevel->bytes_per_image;
97
98 spu_dcache_fetch_unaligned((qword *) & texels[0],
99 texture_ea + spu_extract(offset, 0), 4);
100 spu_dcache_fetch_unaligned((qword *) & texels[1],
101 texture_ea + spu_extract(offset, 1), 4);
102 spu_dcache_fetch_unaligned((qword *) & texels[2],
103 texture_ea + spu_extract(offset, 2), 4);
104 spu_dcache_fetch_unaligned((qword *) & texels[3],
105 texture_ea + spu_extract(offset, 3), 4);
106 }
107
108
109 /** clamp vec to [0, max] */
110 static INLINE vector signed int
111 spu_clamp(vector signed int vec, vector signed int max)
112 {
113 static const vector signed int zero = {0,0,0,0};
114 vector unsigned int c;
115 c = spu_cmpgt(vec, zero); /* c = vec > zero ? ~0 : 0 */
116 vec = spu_sel(zero, vec, c);
117 c = spu_cmpgt(vec, max); /* c = vec > max ? ~0 : 0 */
118 vec = spu_sel(vec, max, c);
119 return vec;
120 }
121
122
123
124 /**
125 * Do nearest texture sampling for four pixels.
126 * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa).
127 */
128 void
129 sample_texture_2d_nearest(vector float s, vector float t,
130 uint unit, uint level, uint face,
131 vector float colors[4])
132 {
133 const struct spu_texture_level *tlevel = &spu.texture[unit].level[level];
134 vector float ss = spu_mul(s, tlevel->scale_s);
135 vector float tt = spu_mul(t, tlevel->scale_t);
136 vector signed int is = spu_convts(ss, 0);
137 vector signed int it = spu_convts(tt, 0);
138 vec_uint4 texels[4];
139
140 /* PIPE_TEX_WRAP_REPEAT */
141 is = spu_and(is, tlevel->mask_s);
142 it = spu_and(it, tlevel->mask_t);
143
144 /* PIPE_TEX_WRAP_CLAMP */
145 is = spu_clamp(is, tlevel->max_s);
146 it = spu_clamp(it, tlevel->max_t);
147
148 get_four_texels(unit, level, face, is, it, texels);
149
150 /* convert four packed ARGBA pixels to float RRRR,GGGG,BBBB,AAAA */
151 spu_unpack_A8R8G8B8_transpose4(texels, colors);
152 }
153
154
155 /**
156 * Do bilinear texture sampling for four pixels.
157 * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa).
158 */
159 void
160 sample_texture_2d_bilinear(vector float s, vector float t,
161 uint unit, uint level, uint face,
162 vector float colors[4])
163 {
164 const struct spu_texture_level *tlevel = &spu.texture[unit].level[level];
165 static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f};
166
167 vector float ss = spu_madd(s, tlevel->scale_s, half);
168 vector float tt = spu_madd(t, tlevel->scale_t, half);
169
170 vector signed int is0 = spu_convts(ss, 0);
171 vector signed int it0 = spu_convts(tt, 0);
172
173 /* is + 1, it + 1 */
174 vector signed int is1 = spu_add(is0, 1);
175 vector signed int it1 = spu_add(it0, 1);
176
177 /* PIPE_TEX_WRAP_REPEAT */
178 is0 = spu_and(is0, tlevel->mask_s);
179 it0 = spu_and(it0, tlevel->mask_t);
180 is1 = spu_and(is1, tlevel->mask_s);
181 it1 = spu_and(it1, tlevel->mask_t);
182
183 /* PIPE_TEX_WRAP_CLAMP */
184 is0 = spu_clamp(is0, tlevel->max_s);
185 it0 = spu_clamp(it0, tlevel->max_t);
186 is1 = spu_clamp(is1, tlevel->max_s);
187 it1 = spu_clamp(it1, tlevel->max_t);
188
189 /* get packed int texels */
190 vector unsigned int texels[16];
191 get_four_texels(unit, level, face, is0, it0, texels + 0); /* upper-left */
192 get_four_texels(unit, level, face, is1, it0, texels + 4); /* upper-right */
193 get_four_texels(unit, level, face, is0, it1, texels + 8); /* lower-left */
194 get_four_texels(unit, level, face, is1, it1, texels + 12); /* lower-right */
195
196 /* convert packed int texels to float colors */
197 vector float ftexels[16];
198 spu_unpack_A8R8G8B8_transpose4(texels + 0, ftexels + 0);
199 spu_unpack_A8R8G8B8_transpose4(texels + 4, ftexels + 4);
200 spu_unpack_A8R8G8B8_transpose4(texels + 8, ftexels + 8);
201 spu_unpack_A8R8G8B8_transpose4(texels + 12, ftexels + 12);
202
203 /* Compute weighting factors in [0,1]
204 * Multiply texcoord by 1024, AND with 1023, convert back to float.
205 */
206 vector float ss1024 = spu_mul(ss, spu_splats(1024.0f));
207 vector signed int iss1024 = spu_convts(ss1024, 0);
208 iss1024 = spu_and(iss1024, 1023);
209 vector float sWeights0 = spu_convtf(iss1024, 10);
210
211 vector float tt1024 = spu_mul(tt, spu_splats(1024.0f));
212 vector signed int itt1024 = spu_convts(tt1024, 0);
213 itt1024 = spu_and(itt1024, 1023);
214 vector float tWeights0 = spu_convtf(itt1024, 10);
215
216 /* 1 - sWeight and 1 - tWeight */
217 vector float sWeights1 = spu_sub(spu_splats(1.0f), sWeights0);
218 vector float tWeights1 = spu_sub(spu_splats(1.0f), tWeights0);
219
220 /* reds, for four pixels */
221 ftexels[ 0] = spu_mul(ftexels[ 0], spu_mul(sWeights1, tWeights1)); /*ul*/
222 ftexels[ 4] = spu_mul(ftexels[ 4], spu_mul(sWeights0, tWeights1)); /*ur*/
223 ftexels[ 8] = spu_mul(ftexels[ 8], spu_mul(sWeights1, tWeights0)); /*ll*/
224 ftexels[12] = spu_mul(ftexels[12], spu_mul(sWeights0, tWeights0)); /*lr*/
225 colors[0] = spu_add(spu_add(ftexels[0], ftexels[4]),
226 spu_add(ftexels[8], ftexels[12]));
227
228 /* greens, for four pixels */
229 ftexels[ 1] = spu_mul(ftexels[ 1], spu_mul(sWeights1, tWeights1)); /*ul*/
230 ftexels[ 5] = spu_mul(ftexels[ 5], spu_mul(sWeights0, tWeights1)); /*ur*/
231 ftexels[ 9] = spu_mul(ftexels[ 9], spu_mul(sWeights1, tWeights0)); /*ll*/
232 ftexels[13] = spu_mul(ftexels[13], spu_mul(sWeights0, tWeights0)); /*lr*/
233 colors[1] = spu_add(spu_add(ftexels[1], ftexels[5]),
234 spu_add(ftexels[9], ftexels[13]));
235
236 /* blues, for four pixels */
237 ftexels[ 2] = spu_mul(ftexels[ 2], spu_mul(sWeights1, tWeights1)); /*ul*/
238 ftexels[ 6] = spu_mul(ftexels[ 6], spu_mul(sWeights0, tWeights1)); /*ur*/
239 ftexels[10] = spu_mul(ftexels[10], spu_mul(sWeights1, tWeights0)); /*ll*/
240 ftexels[14] = spu_mul(ftexels[14], spu_mul(sWeights0, tWeights0)); /*lr*/
241 colors[2] = spu_add(spu_add(ftexels[2], ftexels[6]),
242 spu_add(ftexels[10], ftexels[14]));
243
244 /* alphas, for four pixels */
245 ftexels[ 3] = spu_mul(ftexels[ 3], spu_mul(sWeights1, tWeights1)); /*ul*/
246 ftexels[ 7] = spu_mul(ftexels[ 7], spu_mul(sWeights0, tWeights1)); /*ur*/
247 ftexels[11] = spu_mul(ftexels[11], spu_mul(sWeights1, tWeights0)); /*ll*/
248 ftexels[15] = spu_mul(ftexels[15], spu_mul(sWeights0, tWeights0)); /*lr*/
249 colors[3] = spu_add(spu_add(ftexels[3], ftexels[7]),
250 spu_add(ftexels[11], ftexels[15]));
251 }
252
253
254
255 /**
256 * Adapted from /opt/cell/sdk/usr/spu/include/transpose_matrix4x4.h
257 */
258 static INLINE void
259 transpose(vector unsigned int *mOut0,
260 vector unsigned int *mOut1,
261 vector unsigned int *mOut2,
262 vector unsigned int *mOut3,
263 vector unsigned int *mIn)
264 {
265 vector unsigned int abcd, efgh, ijkl, mnop; /* input vectors */
266 vector unsigned int aeim, bfjn, cgko, dhlp; /* output vectors */
267 vector unsigned int aibj, ckdl, emfn, gohp; /* intermediate vectors */
268
269 vector unsigned char shufflehi = ((vector unsigned char) {
270 0x00, 0x01, 0x02, 0x03,
271 0x10, 0x11, 0x12, 0x13,
272 0x04, 0x05, 0x06, 0x07,
273 0x14, 0x15, 0x16, 0x17});
274 vector unsigned char shufflelo = ((vector unsigned char) {
275 0x08, 0x09, 0x0A, 0x0B,
276 0x18, 0x19, 0x1A, 0x1B,
277 0x0C, 0x0D, 0x0E, 0x0F,
278 0x1C, 0x1D, 0x1E, 0x1F});
279 abcd = *(mIn+0);
280 efgh = *(mIn+1);
281 ijkl = *(mIn+2);
282 mnop = *(mIn+3);
283
284 aibj = spu_shuffle(abcd, ijkl, shufflehi);
285 ckdl = spu_shuffle(abcd, ijkl, shufflelo);
286 emfn = spu_shuffle(efgh, mnop, shufflehi);
287 gohp = spu_shuffle(efgh, mnop, shufflelo);
288
289 aeim = spu_shuffle(aibj, emfn, shufflehi);
290 bfjn = spu_shuffle(aibj, emfn, shufflelo);
291 cgko = spu_shuffle(ckdl, gohp, shufflehi);
292 dhlp = spu_shuffle(ckdl, gohp, shufflelo);
293
294 *mOut0 = aeim;
295 *mOut1 = bfjn;
296 *mOut2 = cgko;
297 *mOut3 = dhlp;
298 }
299
300
301 /**
302 * Bilinear filtering, using int instead of float arithmetic for computing
303 * sample weights.
304 */
305 void
306 sample_texture_2d_bilinear_int(vector float s, vector float t,
307 uint unit, uint level, uint face,
308 vector float colors[4])
309 {
310 const struct spu_texture_level *tlevel = &spu.texture[unit].level[level];
311 static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f};
312
313 /* Scale texcoords by size of texture, and add half pixel bias */
314 vector float ss = spu_madd(s, tlevel->scale_s, half);
315 vector float tt = spu_madd(t, tlevel->scale_t, half);
316
317 /* convert float coords to fixed-pt coords with 8 fraction bits */
318 vector signed int is = spu_convts(ss, 8);
319 vector signed int it = spu_convts(tt, 8);
320
321 /* compute integer texel weights in [0, 255] */
322 vector signed int sWeights0 = spu_and(is, 255);
323 vector signed int tWeights0 = spu_and(it, 255);
324 vector signed int sWeights1 = spu_sub(255, sWeights0);
325 vector signed int tWeights1 = spu_sub(255, tWeights0);
326
327 /* texel coords: is0 = is / 256, it0 = is / 256 */
328 vector signed int is0 = spu_rlmask(is, -8);
329 vector signed int it0 = spu_rlmask(it, -8);
330
331 /* texel coords: i1 = is0 + 1, it1 = it0 + 1 */
332 vector signed int is1 = spu_add(is0, 1);
333 vector signed int it1 = spu_add(it0, 1);
334
335 /* PIPE_TEX_WRAP_REPEAT */
336 is0 = spu_and(is0, tlevel->mask_s);
337 it0 = spu_and(it0, tlevel->mask_t);
338 is1 = spu_and(is1, tlevel->mask_s);
339 it1 = spu_and(it1, tlevel->mask_t);
340
341 /* PIPE_TEX_WRAP_CLAMP */
342 is0 = spu_clamp(is0, tlevel->max_s);
343 it0 = spu_clamp(it0, tlevel->max_t);
344 is1 = spu_clamp(is1, tlevel->max_s);
345 it1 = spu_clamp(it1, tlevel->max_t);
346
347 /* get packed int texels */
348 vector unsigned int texels[16];
349 get_four_texels(unit, level, face, is0, it0, texels + 0); /* upper-left */
350 get_four_texels(unit, level, face, is1, it0, texels + 4); /* upper-right */
351 get_four_texels(unit, level, face, is0, it1, texels + 8); /* lower-left */
352 get_four_texels(unit, level, face, is1, it1, texels + 12); /* lower-right */
353
354 /* twiddle packed 32-bit BGRA pixels into RGBA as four unsigned ints */
355 {
356 static const unsigned char ZERO = 0x80;
357 int i;
358 for (i = 0; i < 16; i++) {
359 texels[i] = spu_shuffle(texels[i], texels[i],
360 ((vector unsigned char) {
361 ZERO, ZERO, ZERO, 1,
362 ZERO, ZERO, ZERO, 2,
363 ZERO, ZERO, ZERO, 3,
364 ZERO, ZERO, ZERO, 0}));
365 }
366 }
367
368 /* convert RGBA,RGBA,RGBA,RGBA to RRRR,GGGG,BBBB,AAAA */
369 vector unsigned int texel0, texel1, texel2, texel3, texel4, texel5, texel6, texel7,
370 texel8, texel9, texel10, texel11, texel12, texel13, texel14, texel15;
371 transpose(&texel0, &texel1, &texel2, &texel3, texels + 0);
372 transpose(&texel4, &texel5, &texel6, &texel7, texels + 4);
373 transpose(&texel8, &texel9, &texel10, &texel11, texels + 8);
374 transpose(&texel12, &texel13, &texel14, &texel15, texels + 12);
375
376 /* computed weighted colors */
377 vector unsigned int c0, c1, c2, c3, cSum;
378
379 /* red */
380 c0 = (vector unsigned int) si_mpyu((qword) texel0, si_mpyu((qword) sWeights1, (qword) tWeights1)); /*ul*/
381 c1 = (vector unsigned int) si_mpyu((qword) texel4, si_mpyu((qword) sWeights0, (qword) tWeights1)); /*ur*/
382 c2 = (vector unsigned int) si_mpyu((qword) texel8, si_mpyu((qword) sWeights1, (qword) tWeights0)); /*ll*/
383 c3 = (vector unsigned int) si_mpyu((qword) texel12, si_mpyu((qword) sWeights0, (qword) tWeights0)); /*lr*/
384 cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
385 colors[0] = spu_convtf(cSum, 24);
386
387 /* green */
388 c0 = (vector unsigned int) si_mpyu((qword) texel1, si_mpyu((qword) sWeights1, (qword) tWeights1)); /*ul*/
389 c1 = (vector unsigned int) si_mpyu((qword) texel5, si_mpyu((qword) sWeights0, (qword) tWeights1)); /*ur*/
390 c2 = (vector unsigned int) si_mpyu((qword) texel9, si_mpyu((qword) sWeights1, (qword) tWeights0)); /*ll*/
391 c3 = (vector unsigned int) si_mpyu((qword) texel13, si_mpyu((qword) sWeights0, (qword) tWeights0)); /*lr*/
392 cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
393 colors[1] = spu_convtf(cSum, 24);
394
395 /* blue */
396 c0 = (vector unsigned int) si_mpyu((qword) texel2, si_mpyu((qword) sWeights1, (qword) tWeights1)); /*ul*/
397 c1 = (vector unsigned int) si_mpyu((qword) texel6, si_mpyu((qword) sWeights0, (qword) tWeights1)); /*ur*/
398 c2 = (vector unsigned int) si_mpyu((qword) texel10, si_mpyu((qword) sWeights1, (qword) tWeights0)); /*ll*/
399 c3 = (vector unsigned int) si_mpyu((qword) texel14, si_mpyu((qword) sWeights0, (qword) tWeights0)); /*lr*/
400 cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
401 colors[2] = spu_convtf(cSum, 24);
402
403 /* alpha */
404 c0 = (vector unsigned int) si_mpyu((qword) texel3, si_mpyu((qword) sWeights1, (qword) tWeights1)); /*ul*/
405 c1 = (vector unsigned int) si_mpyu((qword) texel7, si_mpyu((qword) sWeights0, (qword) tWeights1)); /*ur*/
406 c2 = (vector unsigned int) si_mpyu((qword) texel11, si_mpyu((qword) sWeights1, (qword) tWeights0)); /*ll*/
407 c3 = (vector unsigned int) si_mpyu((qword) texel15, si_mpyu((qword) sWeights0, (qword) tWeights0)); /*lr*/
408 cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
409 colors[3] = spu_convtf(cSum, 24);
410 }
411
412
413
414 /**
415 * Compute level of detail factor from texcoords.
416 */
417 static float
418 compute_lambda(uint unit, vector float s, vector float t)
419 {
420 uint baseLevel = 0;
421 float width = spu.texture[unit].level[baseLevel].width;
422 float height = spu.texture[unit].level[baseLevel].width;
423 float dsdx = width * (spu_extract(s, 1) - spu_extract(s, 0));
424 float dsdy = width * (spu_extract(s, 2) - spu_extract(s, 0));
425 float dtdx = height * (spu_extract(t, 1) - spu_extract(t, 0));
426 float dtdy = height * (spu_extract(t, 2) - spu_extract(t, 0));
427 float x = dsdx * dsdx + dtdx * dtdx;
428 float y = dsdy * dsdy + dtdy * dtdy;
429 float rho = x > y ? x : y;
430 rho = sqrtf(rho);
431 float lambda = logf(rho) * 1.442695f;
432 return lambda;
433 }
434
435
436
437 /**
438 * Texture sampling with level of detail selection.
439 */
440 void
441 sample_texture_2d_lod(vector float s, vector float t,
442 uint unit, uint level_ignored, uint face,
443 vector float colors[4])
444 {
445 /*
446 * Note that we're computing a lambda/lod here that's used for all
447 * four pixels in the quad.
448 */
449 float lambda = compute_lambda(unit, s, t);
450
451 (void) face;
452 (void) level_ignored;
453
454 /* apply lod bias */
455 lambda += spu.sampler[unit].lod_bias;
456
457 /* clamp */
458 if (lambda < spu.sampler[unit].min_lod)
459 lambda = spu.sampler[unit].min_lod;
460 else if (lambda > spu.sampler[unit].max_lod)
461 lambda = spu.sampler[unit].max_lod;
462
463 if (lambda <= 0.0f) {
464 /* magnify */
465 spu.mag_sample_texture_2d[unit](s, t, unit, 0, 0, colors);
466 }
467 else {
468 /* minify */
469 int level = (int) (lambda + 0.5f);
470 if (level > (int) spu.texture[unit].max_level)
471 level = spu.texture[unit].max_level;
472 spu.min_sample_texture_2d[unit](s, t, unit, level, 0, colors);
473 /* XXX to do: mipmap level interpolation */
474 }
475 }
476
477
478 /** XXX need a SIMD version of this */
479 static unsigned
480 choose_cube_face(float rx, float ry, float rz, float *newS, float *newT)
481 {
482 /*
483 major axis
484 direction target sc tc ma
485 ---------- ------------------------------- --- --- ---
486 +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx
487 -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx
488 +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry
489 -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry
490 +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz
491 -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz
492 */
493 const float arx = fabsf(rx);
494 const float ary = fabsf(ry);
495 const float arz = fabsf(rz);
496 unsigned face;
497 float sc, tc, ma;
498
499 if (arx > ary && arx > arz) {
500 if (rx >= 0.0F) {
501 face = PIPE_TEX_FACE_POS_X;
502 sc = -rz;
503 tc = -ry;
504 ma = arx;
505 }
506 else {
507 face = PIPE_TEX_FACE_NEG_X;
508 sc = rz;
509 tc = -ry;
510 ma = arx;
511 }
512 }
513 else if (ary > arx && ary > arz) {
514 if (ry >= 0.0F) {
515 face = PIPE_TEX_FACE_POS_Y;
516 sc = rx;
517 tc = rz;
518 ma = ary;
519 }
520 else {
521 face = PIPE_TEX_FACE_NEG_Y;
522 sc = rx;
523 tc = -rz;
524 ma = ary;
525 }
526 }
527 else {
528 if (rz > 0.0F) {
529 face = PIPE_TEX_FACE_POS_Z;
530 sc = rx;
531 tc = -ry;
532 ma = arz;
533 }
534 else {
535 face = PIPE_TEX_FACE_NEG_Z;
536 sc = -rx;
537 tc = -ry;
538 ma = arz;
539 }
540 }
541
542 *newS = (sc / ma + 1.0F) * 0.5F;
543 *newT = (tc / ma + 1.0F) * 0.5F;
544
545 return face;
546 }
547
548
549
550 void
551 sample_texture_cube(vector float s, vector float t, vector float r,
552 uint unit, vector float colors[4])
553 {
554 uint p, faces[4], level = 0;
555 float newS[4], newT[4];
556
557 /* Compute cube faces referenced by the four sets of texcoords.
558 * XXX we should SIMD-ize this.
559 */
560 for (p = 0; p < 4; p++) {
561 float rx = spu_extract(s, p);
562 float ry = spu_extract(t, p);
563 float rz = spu_extract(r, p);
564 faces[p] = choose_cube_face(rx, ry, rz, &newS[p], &newT[p]);
565 }
566
567 if (faces[0] == faces[1] &&
568 faces[0] == faces[2] &&
569 faces[0] == faces[3]) {
570 /* GOOD! All four texcoords refer to the same cube face */
571 s = (vector float) {newS[0], newS[1], newS[2], newS[3]};
572 t = (vector float) {newT[0], newT[1], newT[2], newT[3]};
573 spu.sample_texture_2d[unit](s, t, unit, level, faces[0], colors);
574 }
575 else {
576 /* BAD! The four texcoords refer to different faces */
577 for (p = 0; p < 4; p++) {
578 vector float c[4];
579
580 spu.sample_texture_2d[unit](spu_splats(newS[p]), spu_splats(newT[p]),
581 unit, level, faces[p], c);
582
583 float red = spu_extract(c[0], p);
584 float green = spu_extract(c[1], p);
585 float blue = spu_extract(c[2], p);
586 float alpha = spu_extract(c[3], p);
587
588 colors[0] = spu_insert(red, colors[0], p);
589 colors[1] = spu_insert(green, colors[1], p);
590 colors[2] = spu_insert(blue, colors[2], p);
591 colors[3] = spu_insert(alpha, colors[3], p);
592 }
593 }
594 }