Merge commit 'origin/gallium-0.1' into gallium-0.2
[mesa.git] / src / gallium / drivers / cell / spu / spu_texture.c
1 /**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 #include <math.h>
30
31 #include "pipe/p_compiler.h"
32 #include "spu_main.h"
33 #include "spu_texture.h"
34 #include "spu_tile.h"
35 #include "spu_colorpack.h"
36 #include "spu_dcache.h"
37
38
39 /**
40 * Mark all tex cache entries as invalid.
41 */
42 void
43 invalidate_tex_cache(void)
44 {
45 uint lvl;
46 for (lvl = 0; lvl < CELL_MAX_TEXTURE_LEVELS; lvl++) {
47 uint unit = 0;
48 uint bytes = 4 * spu.texture[unit].level[lvl].width
49 * spu.texture[unit].level[lvl].height;
50
51 if (spu.texture[unit].target == PIPE_TEXTURE_CUBE)
52 bytes *= 6;
53 else if (spu.texture[unit].target == PIPE_TEXTURE_3D)
54 bytes *= spu.texture[unit].level[lvl].depth;
55
56 spu_dcache_mark_dirty((unsigned) spu.texture[unit].level[lvl].start, bytes);
57 }
58 }
59
60
61 /**
62 * Get four texels from locations (x[0], y[0]), (x[1], y[1]) ...
63 *
64 * NOTE: in the typical case of bilinear filtering, the four texels
65 * are in a 2x2 group so we could get by with just two dcache fetches
66 * (two side-by-side texels per fetch). But when bilinear filtering
67 * wraps around a texture edge, we'll probably need code like we have
68 * now.
69 * FURTHERMORE: since we're rasterizing a quad of 2x2 pixels at a time,
70 * it's quite likely that the four pixels in a quad will need some of the
71 * same texels. So look into doing texture fetches for four pixels at
72 * a time.
73 */
74 static void
75 get_four_texels(uint unit, uint level, uint face, vec_int4 x, vec_int4 y,
76 vec_uint4 *texels)
77 {
78 const struct spu_texture_level *tlevel = &spu.texture[unit].level[level];
79 unsigned texture_ea = (uintptr_t) tlevel->start;
80 const vec_int4 tile_x = spu_rlmask(x, -5); /* tile_x = x / 32 */
81 const vec_int4 tile_y = spu_rlmask(y, -5); /* tile_y = y / 32 */
82 const qword offset_x = si_andi((qword) x, 0x1f); /* offset_x = x & 0x1f */
83 const qword offset_y = si_andi((qword) y, 0x1f); /* offset_y = y & 0x1f */
84
85 const qword tiles_per_row = (qword) spu_splats(tlevel->tiles_per_row);
86 const qword tile_size = (qword) spu_splats((unsigned) sizeof(tile_t));
87
88 qword tile_offset = si_mpya((qword) tile_y, tiles_per_row, (qword) tile_x);
89 tile_offset = si_mpy((qword) tile_offset, tile_size);
90
91 qword texel_offset = si_a(si_mpyui(offset_y, 32), offset_x);
92 texel_offset = si_mpyui(texel_offset, 4);
93
94 vec_uint4 offset = (vec_uint4) si_a(tile_offset, texel_offset);
95
96 texture_ea = texture_ea + face * tlevel->bytes_per_image;
97
98 spu_dcache_fetch_unaligned((qword *) & texels[0],
99 texture_ea + spu_extract(offset, 0), 4);
100 spu_dcache_fetch_unaligned((qword *) & texels[1],
101 texture_ea + spu_extract(offset, 1), 4);
102 spu_dcache_fetch_unaligned((qword *) & texels[2],
103 texture_ea + spu_extract(offset, 2), 4);
104 spu_dcache_fetch_unaligned((qword *) & texels[3],
105 texture_ea + spu_extract(offset, 3), 4);
106 }
107
108
109 /** clamp vec to [0, max] */
110 static INLINE vector signed int
111 spu_clamp(vector signed int vec, vector signed int max)
112 {
113 static const vector signed int zero = {0,0,0,0};
114 vector unsigned int c;
115 c = spu_cmpgt(vec, zero); /* c = vec > zero ? ~0 : 0 */
116 vec = spu_sel(zero, vec, c);
117 c = spu_cmpgt(vec, max); /* c = vec > max ? ~0 : 0 */
118 vec = spu_sel(vec, max, c);
119 return vec;
120 }
121
122
123
124 /**
125 * Do nearest texture sampling for four pixels.
126 * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa).
127 */
128 void
129 sample_texture4_nearest(vector float s, vector float t,
130 vector float r, vector float q,
131 uint unit, uint level, uint face,
132 vector float colors[4])
133 {
134 const struct spu_texture_level *tlevel = &spu.texture[unit].level[level];
135 vector float ss = spu_mul(s, tlevel->scale_s);
136 vector float tt = spu_mul(t, tlevel->scale_t);
137 vector signed int is = spu_convts(ss, 0);
138 vector signed int it = spu_convts(tt, 0);
139 vec_uint4 texels[4];
140
141 /* PIPE_TEX_WRAP_REPEAT */
142 is = spu_and(is, tlevel->mask_s);
143 it = spu_and(it, tlevel->mask_t);
144
145 /* PIPE_TEX_WRAP_CLAMP */
146 is = spu_clamp(is, tlevel->max_s);
147 it = spu_clamp(it, tlevel->max_t);
148
149 get_four_texels(unit, level, face, is, it, texels);
150
151 /* convert four packed ARGBA pixels to float RRRR,GGGG,BBBB,AAAA */
152 spu_unpack_A8R8G8B8_transpose4(texels, colors);
153 }
154
155
156 /**
157 * Do bilinear texture sampling for four pixels.
158 * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa).
159 */
160 void
161 sample_texture4_bilinear(vector float s, vector float t,
162 vector float r, vector float q,
163 uint unit, uint level, uint face,
164 vector float colors[4])
165 {
166 const struct spu_texture_level *tlevel = &spu.texture[unit].level[level];
167 static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f};
168
169 vector float ss = spu_madd(s, tlevel->scale_s, half);
170 vector float tt = spu_madd(t, tlevel->scale_t, half);
171
172 vector signed int is0 = spu_convts(ss, 0);
173 vector signed int it0 = spu_convts(tt, 0);
174
175 /* is + 1, it + 1 */
176 vector signed int is1 = spu_add(is0, 1);
177 vector signed int it1 = spu_add(it0, 1);
178
179 /* PIPE_TEX_WRAP_REPEAT */
180 is0 = spu_and(is0, tlevel->mask_s);
181 it0 = spu_and(it0, tlevel->mask_t);
182 is1 = spu_and(is1, tlevel->mask_s);
183 it1 = spu_and(it1, tlevel->mask_t);
184
185 /* PIPE_TEX_WRAP_CLAMP */
186 is0 = spu_clamp(is0, tlevel->max_s);
187 it0 = spu_clamp(it0, tlevel->max_t);
188 is1 = spu_clamp(is1, tlevel->max_s);
189 it1 = spu_clamp(it1, tlevel->max_t);
190
191 /* get packed int texels */
192 vector unsigned int texels[16];
193 get_four_texels(unit, level, face, is0, it0, texels + 0); /* upper-left */
194 get_four_texels(unit, level, face, is1, it0, texels + 4); /* upper-right */
195 get_four_texels(unit, level, face, is0, it1, texels + 8); /* lower-left */
196 get_four_texels(unit, level, face, is1, it1, texels + 12); /* lower-right */
197
198 /* XXX possibly rework following code to compute the weighted sample
199 * colors with integer arithmetic for fewer int->float conversions.
200 */
201
202 /* convert packed int texels to float colors */
203 vector float ftexels[16];
204 spu_unpack_A8R8G8B8_transpose4(texels + 0, ftexels + 0);
205 spu_unpack_A8R8G8B8_transpose4(texels + 4, ftexels + 4);
206 spu_unpack_A8R8G8B8_transpose4(texels + 8, ftexels + 8);
207 spu_unpack_A8R8G8B8_transpose4(texels + 12, ftexels + 12);
208
209 /* Compute weighting factors in [0,1]
210 * Multiply texcoord by 1024, AND with 1023, convert back to float.
211 */
212 vector float ss1024 = spu_mul(ss, spu_splats(1024.0f));
213 vector signed int iss1024 = spu_convts(ss1024, 0);
214 iss1024 = spu_and(iss1024, 1023);
215 vector float sWeights0 = spu_convtf(iss1024, 10);
216
217 vector float tt1024 = spu_mul(tt, spu_splats(1024.0f));
218 vector signed int itt1024 = spu_convts(tt1024, 0);
219 itt1024 = spu_and(itt1024, 1023);
220 vector float tWeights0 = spu_convtf(itt1024, 10);
221
222 /* 1 - sWeight and 1 - tWeight */
223 vector float sWeights1 = spu_sub(spu_splats(1.0f), sWeights0);
224 vector float tWeights1 = spu_sub(spu_splats(1.0f), tWeights0);
225
226 /* reds, for four pixels */
227 ftexels[ 0] = spu_mul(ftexels[ 0], spu_mul(sWeights1, tWeights1)); /*ul*/
228 ftexels[ 4] = spu_mul(ftexels[ 4], spu_mul(sWeights0, tWeights1)); /*ur*/
229 ftexels[ 8] = spu_mul(ftexels[ 8], spu_mul(sWeights1, tWeights0)); /*ll*/
230 ftexels[12] = spu_mul(ftexels[12], spu_mul(sWeights0, tWeights0)); /*lr*/
231 colors[0] = spu_add(spu_add(ftexels[0], ftexels[4]),
232 spu_add(ftexels[8], ftexels[12]));
233
234 /* greens, for four pixels */
235 ftexels[ 1] = spu_mul(ftexels[ 1], spu_mul(sWeights1, tWeights1)); /*ul*/
236 ftexels[ 5] = spu_mul(ftexels[ 5], spu_mul(sWeights0, tWeights1)); /*ur*/
237 ftexels[ 9] = spu_mul(ftexels[ 9], spu_mul(sWeights1, tWeights0)); /*ll*/
238 ftexels[13] = spu_mul(ftexels[13], spu_mul(sWeights0, tWeights0)); /*lr*/
239 colors[1] = spu_add(spu_add(ftexels[1], ftexels[5]),
240 spu_add(ftexels[9], ftexels[13]));
241
242 /* blues, for four pixels */
243 ftexels[ 2] = spu_mul(ftexels[ 2], spu_mul(sWeights1, tWeights1)); /*ul*/
244 ftexels[ 6] = spu_mul(ftexels[ 6], spu_mul(sWeights0, tWeights1)); /*ur*/
245 ftexels[10] = spu_mul(ftexels[10], spu_mul(sWeights1, tWeights0)); /*ll*/
246 ftexels[14] = spu_mul(ftexels[14], spu_mul(sWeights0, tWeights0)); /*lr*/
247 colors[2] = spu_add(spu_add(ftexels[2], ftexels[6]),
248 spu_add(ftexels[10], ftexels[14]));
249
250 /* alphas, for four pixels */
251 ftexels[ 3] = spu_mul(ftexels[ 3], spu_mul(sWeights1, tWeights1)); /*ul*/
252 ftexels[ 7] = spu_mul(ftexels[ 7], spu_mul(sWeights0, tWeights1)); /*ur*/
253 ftexels[11] = spu_mul(ftexels[11], spu_mul(sWeights1, tWeights0)); /*ll*/
254 ftexels[15] = spu_mul(ftexels[15], spu_mul(sWeights0, tWeights0)); /*lr*/
255 colors[3] = spu_add(spu_add(ftexels[3], ftexels[7]),
256 spu_add(ftexels[11], ftexels[15]));
257 }
258
259
260
261 /**
262 * Adapted from /opt/cell/sdk/usr/spu/include/transpose_matrix4x4.h
263 */
264 static INLINE void
265 transpose(vector unsigned int *mOut0,
266 vector unsigned int *mOut1,
267 vector unsigned int *mOut2,
268 vector unsigned int *mOut3,
269 vector unsigned int *mIn)
270 {
271 vector unsigned int abcd, efgh, ijkl, mnop; /* input vectors */
272 vector unsigned int aeim, bfjn, cgko, dhlp; /* output vectors */
273 vector unsigned int aibj, ckdl, emfn, gohp; /* intermediate vectors */
274
275 vector unsigned char shufflehi = ((vector unsigned char) {
276 0x00, 0x01, 0x02, 0x03,
277 0x10, 0x11, 0x12, 0x13,
278 0x04, 0x05, 0x06, 0x07,
279 0x14, 0x15, 0x16, 0x17});
280 vector unsigned char shufflelo = ((vector unsigned char) {
281 0x08, 0x09, 0x0A, 0x0B,
282 0x18, 0x19, 0x1A, 0x1B,
283 0x0C, 0x0D, 0x0E, 0x0F,
284 0x1C, 0x1D, 0x1E, 0x1F});
285 abcd = *(mIn+0);
286 efgh = *(mIn+1);
287 ijkl = *(mIn+2);
288 mnop = *(mIn+3);
289
290 aibj = spu_shuffle(abcd, ijkl, shufflehi);
291 ckdl = spu_shuffle(abcd, ijkl, shufflelo);
292 emfn = spu_shuffle(efgh, mnop, shufflehi);
293 gohp = spu_shuffle(efgh, mnop, shufflelo);
294
295 aeim = spu_shuffle(aibj, emfn, shufflehi);
296 bfjn = spu_shuffle(aibj, emfn, shufflelo);
297 cgko = spu_shuffle(ckdl, gohp, shufflehi);
298 dhlp = spu_shuffle(ckdl, gohp, shufflelo);
299
300 *mOut0 = aeim;
301 *mOut1 = bfjn;
302 *mOut2 = cgko;
303 *mOut3 = dhlp;
304 }
305
306
307 /**
308 * Bilinear filtering, using int intead of float arithmetic
309 */
310 void
311 sample_texture4_bilinear_2(vector float s, vector float t,
312 vector float r, vector float q,
313 uint unit, uint level, uint face,
314 vector float colors[4])
315 {
316 const struct spu_texture_level *tlevel = &spu.texture[unit].level[level];
317 static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f};
318
319 /* Scale texcoords by size of texture, and add half pixel bias */
320 vector float ss = spu_madd(s, tlevel->scale_s, half);
321 vector float tt = spu_madd(t, tlevel->scale_t, half);
322
323 /* convert float coords to fixed-pt coords with 8 fraction bits */
324 vector signed int is = spu_convts(ss, 8);
325 vector signed int it = spu_convts(tt, 8);
326
327 /* compute integer texel weights in [0, 255] */
328 vector signed int sWeights0 = spu_and(is, 255);
329 vector signed int tWeights0 = spu_and(it, 255);
330 vector signed int sWeights1 = spu_sub(255, sWeights0);
331 vector signed int tWeights1 = spu_sub(255, tWeights0);
332
333 /* texel coords: is0 = is / 256, it0 = is / 256 */
334 vector signed int is0 = spu_rlmask(is, -8);
335 vector signed int it0 = spu_rlmask(it, -8);
336
337 /* texel coords: i1 = is0 + 1, it1 = it0 + 1 */
338 vector signed int is1 = spu_add(is0, 1);
339 vector signed int it1 = spu_add(it0, 1);
340
341 /* PIPE_TEX_WRAP_REPEAT */
342 is0 = spu_and(is0, tlevel->mask_s);
343 it0 = spu_and(it0, tlevel->mask_t);
344 is1 = spu_and(is1, tlevel->mask_s);
345 it1 = spu_and(it1, tlevel->mask_t);
346
347 /* PIPE_TEX_WRAP_CLAMP */
348 is0 = spu_clamp(is0, tlevel->max_s);
349 it0 = spu_clamp(it0, tlevel->max_t);
350 is1 = spu_clamp(is1, tlevel->max_s);
351 it1 = spu_clamp(it1, tlevel->max_t);
352
353 /* get packed int texels */
354 vector unsigned int texels[16];
355 get_four_texels(unit, level, face, is0, it0, texels + 0); /* upper-left */
356 get_four_texels(unit, level, face, is1, it0, texels + 4); /* upper-right */
357 get_four_texels(unit, level, face, is0, it1, texels + 8); /* lower-left */
358 get_four_texels(unit, level, face, is1, it1, texels + 12); /* lower-right */
359
360 /* twiddle packed 32-bit BGRA pixels into RGBA as four unsigned ints */
361 {
362 static const unsigned char ZERO = 0x80;
363 int i;
364 for (i = 0; i < 16; i++) {
365 texels[i] = spu_shuffle(texels[i], texels[i],
366 ((vector unsigned char) {
367 ZERO, ZERO, ZERO, 1,
368 ZERO, ZERO, ZERO, 2,
369 ZERO, ZERO, ZERO, 3,
370 ZERO, ZERO, ZERO, 0}));
371 }
372 }
373
374 /* convert RGBA,RGBA,RGBA,RGBA to RRRR,GGGG,BBBB,AAAA */
375 vector unsigned int texel0, texel1, texel2, texel3, texel4, texel5, texel6, texel7,
376 texel8, texel9, texel10, texel11, texel12, texel13, texel14, texel15;
377 transpose(&texel0, &texel1, &texel2, &texel3, texels + 0);
378 transpose(&texel4, &texel5, &texel6, &texel7, texels + 4);
379 transpose(&texel8, &texel9, &texel10, &texel11, texels + 8);
380 transpose(&texel12, &texel13, &texel14, &texel15, texels + 12);
381
382 /* computed weighted colors */
383 vector unsigned int c0, c1, c2, c3, cSum;
384
385 /* red */
386 c0 = (vector unsigned int) si_mpyu((qword) texel0, si_mpyu((qword) sWeights1, (qword) tWeights1)); /*ul*/
387 c1 = (vector unsigned int) si_mpyu((qword) texel4, si_mpyu((qword) sWeights0, (qword) tWeights1)); /*ur*/
388 c2 = (vector unsigned int) si_mpyu((qword) texel8, si_mpyu((qword) sWeights1, (qword) tWeights0)); /*ll*/
389 c3 = (vector unsigned int) si_mpyu((qword) texel12, si_mpyu((qword) sWeights0, (qword) tWeights0)); /*lr*/
390 cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
391 colors[0] = spu_convtf(cSum, 24);
392
393 /* green */
394 c0 = (vector unsigned int) si_mpyu((qword) texel1, si_mpyu((qword) sWeights1, (qword) tWeights1)); /*ul*/
395 c1 = (vector unsigned int) si_mpyu((qword) texel5, si_mpyu((qword) sWeights0, (qword) tWeights1)); /*ur*/
396 c2 = (vector unsigned int) si_mpyu((qword) texel9, si_mpyu((qword) sWeights1, (qword) tWeights0)); /*ll*/
397 c3 = (vector unsigned int) si_mpyu((qword) texel13, si_mpyu((qword) sWeights0, (qword) tWeights0)); /*lr*/
398 cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
399 colors[1] = spu_convtf(cSum, 24);
400
401 /* blue */
402 c0 = (vector unsigned int) si_mpyu((qword) texel2, si_mpyu((qword) sWeights1, (qword) tWeights1)); /*ul*/
403 c1 = (vector unsigned int) si_mpyu((qword) texel6, si_mpyu((qword) sWeights0, (qword) tWeights1)); /*ur*/
404 c2 = (vector unsigned int) si_mpyu((qword) texel10, si_mpyu((qword) sWeights1, (qword) tWeights0)); /*ll*/
405 c3 = (vector unsigned int) si_mpyu((qword) texel14, si_mpyu((qword) sWeights0, (qword) tWeights0)); /*lr*/
406 cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
407 colors[2] = spu_convtf(cSum, 24);
408
409 /* alpha */
410 c0 = (vector unsigned int) si_mpyu((qword) texel3, si_mpyu((qword) sWeights1, (qword) tWeights1)); /*ul*/
411 c1 = (vector unsigned int) si_mpyu((qword) texel7, si_mpyu((qword) sWeights0, (qword) tWeights1)); /*ur*/
412 c2 = (vector unsigned int) si_mpyu((qword) texel11, si_mpyu((qword) sWeights1, (qword) tWeights0)); /*ll*/
413 c3 = (vector unsigned int) si_mpyu((qword) texel15, si_mpyu((qword) sWeights0, (qword) tWeights0)); /*lr*/
414 cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3));
415 colors[3] = spu_convtf(cSum, 24);
416 }
417
418
419
420 /**
421 * Compute level of detail factor from texcoords.
422 */
423 static float
424 compute_lambda(uint unit, vector float s, vector float t)
425 {
426 uint baseLevel = 0;
427 float width = spu.texture[unit].level[baseLevel].width;
428 float height = spu.texture[unit].level[baseLevel].width;
429 float dsdx = width * (spu_extract(s, 1) - spu_extract(s, 0));
430 float dsdy = width * (spu_extract(s, 2) - spu_extract(s, 0));
431 float dtdx = height * (spu_extract(t, 1) - spu_extract(t, 0));
432 float dtdy = height * (spu_extract(t, 2) - spu_extract(t, 0));
433 float x = dsdx * dsdx + dtdx * dtdx;
434 float y = dsdy * dsdy + dtdy * dtdy;
435 float rho = x > y ? x : y;
436 rho = sqrtf(rho);
437 float lambda = logf(rho) * 1.442695f;
438 return lambda;
439 }
440
441
442
443 /**
444 * Texture sampling with level of detail selection.
445 */
446 void
447 sample_texture4_lod(vector float s, vector float t,
448 vector float r, vector float q,
449 uint unit, uint level_ignored, uint face,
450 vector float colors[4])
451 {
452 /*
453 * Note that we're computing a lambda/lod here that's used for all
454 * four pixels in the quad.
455 */
456 float lambda = compute_lambda(unit, s, t);
457
458 /* apply lod bias */
459 lambda += spu.sampler[unit].lod_bias;
460
461 /* clamp */
462 if (lambda < spu.sampler[unit].min_lod)
463 lambda = spu.sampler[unit].min_lod;
464 else if (lambda > spu.sampler[unit].max_lod)
465 lambda = spu.sampler[unit].max_lod;
466
467 if (lambda <= 0.0f) {
468 /* magnify */
469 spu.mag_sample_texture4[unit](s, t, r, q, unit, 0, 0, colors);
470 }
471 else {
472 /* minify */
473 int level = (int) (lambda + 0.5f);
474 if (level > (int) spu.texture[unit].max_level)
475 level = spu.texture[unit].max_level;
476 spu.min_sample_texture4[unit](s, t, r, q, unit, level, 0, colors);
477 /* XXX to do: mipmap level interpolation */
478 }
479 }
480
481
482 /** XXX need a SIMD version of this */
483 static unsigned
484 choose_cube_face(float rx, float ry, float rz, float *newS, float *newT)
485 {
486 /*
487 major axis
488 direction target sc tc ma
489 ---------- ------------------------------- --- --- ---
490 +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx
491 -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx
492 +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry
493 -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry
494 +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz
495 -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz
496 */
497 const float arx = fabsf(rx);
498 const float ary = fabsf(ry);
499 const float arz = fabsf(rz);
500 unsigned face;
501 float sc, tc, ma;
502
503 if (arx > ary && arx > arz) {
504 if (rx >= 0.0F) {
505 face = PIPE_TEX_FACE_POS_X;
506 sc = -rz;
507 tc = -ry;
508 ma = arx;
509 }
510 else {
511 face = PIPE_TEX_FACE_NEG_X;
512 sc = rz;
513 tc = -ry;
514 ma = arx;
515 }
516 }
517 else if (ary > arx && ary > arz) {
518 if (ry >= 0.0F) {
519 face = PIPE_TEX_FACE_POS_Y;
520 sc = rx;
521 tc = rz;
522 ma = ary;
523 }
524 else {
525 face = PIPE_TEX_FACE_NEG_Y;
526 sc = rx;
527 tc = -rz;
528 ma = ary;
529 }
530 }
531 else {
532 if (rz > 0.0F) {
533 face = PIPE_TEX_FACE_POS_Z;
534 sc = rx;
535 tc = -ry;
536 ma = arz;
537 }
538 else {
539 face = PIPE_TEX_FACE_NEG_Z;
540 sc = -rx;
541 tc = -ry;
542 ma = arz;
543 }
544 }
545
546 *newS = (sc / ma + 1.0F) * 0.5F;
547 *newT = (tc / ma + 1.0F) * 0.5F;
548
549 return face;
550 }
551
552
553
554 void
555 sample_texture4_cube(vector float s, vector float t,
556 vector float r, vector float q,
557 uint unit, uint level, uint face_ignored,
558 vector float colors[4])
559 {
560 static const vector float zero = {0.0f, 0.0f, 0.0f, 0.0f};
561 uint p, faces[4];
562 float newS[4], newT[4];
563
564 /* Compute cube face referenced by the four sets of texcoords.
565 * XXX we should SIMD-ize this.
566 */
567 for (p = 0; p < 4; p++) {
568 float rx = spu_extract(s, p);
569 float ry = spu_extract(t, p);
570 float rz = spu_extract(r, p);
571 faces[p] = choose_cube_face(rx, ry, rz, &newS[p], &newT[p]);
572 }
573
574 if (faces[0] == faces[1] &&
575 faces[0] == faces[2] &&
576 faces[0] == faces[3]) {
577 /* GOOD! All four texcoords refer to the same cube face */
578 s = (vector float) {newS[0], newS[1], newS[2], newS[3]};
579 t = (vector float) {newT[0], newT[1], newT[2], newT[3]};
580 sample_texture4_nearest(s, t, zero, zero, unit, level, faces[0], colors);
581 }
582 else {
583 /* BAD! The four texcoords refer to different faces */
584 for (p = 0; p < 4; p++) {
585 vector float c[4];
586
587 sample_texture4_nearest(spu_splats(newS[p]), spu_splats(newT[p]),
588 zero, zero, unit, level, faces[p], c);
589
590 float red = spu_extract(c[0], p);
591 float green = spu_extract(c[1], p);
592 float blue = spu_extract(c[2], p);
593 float alpha = spu_extract(c[3], p);
594
595 colors[0] = spu_insert(red, colors[0], p);
596 colors[1] = spu_insert(green, colors[1], p);
597 colors[2] = spu_insert(blue, colors[2], p);
598 colors[3] = spu_insert(alpha, colors[3], p);
599 }
600 }
601 }