1 /**************************************************************************
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
31 #include "pipe/p_compiler.h"
33 #include "spu_texture.h"
35 #include "spu_colorpack.h"
36 #include "spu_dcache.h"
40 * Mark all tex cache entries as invalid.
43 invalidate_tex_cache(void)
46 for (lvl
= 0; lvl
< CELL_MAX_TEXTURE_LEVELS
; lvl
++) {
48 uint bytes
= 4 * spu
.texture
[unit
].level
[lvl
].width
49 * spu
.texture
[unit
].level
[lvl
].height
;
51 if (spu
.texture
[unit
].target
== PIPE_TEXTURE_CUBE
)
53 else if (spu
.texture
[unit
].target
== PIPE_TEXTURE_3D
)
54 bytes
*= spu
.texture
[unit
].level
[lvl
].depth
;
56 spu_dcache_mark_dirty((unsigned) spu
.texture
[unit
].level
[lvl
].start
, bytes
);
62 * Get four texels from locations (x[0], y[0]), (x[1], y[1]) ...
64 * NOTE: in the typical case of bilinear filtering, the four texels
65 * are in a 2x2 group so we could get by with just two dcache fetches
66 * (two side-by-side texels per fetch). But when bilinear filtering
67 * wraps around a texture edge, we'll probably need code like we have
69 * FURTHERMORE: since we're rasterizing a quad of 2x2 pixels at a time,
70 * it's quite likely that the four pixels in a quad will need some of the
71 * same texels. So look into doing texture fetches for four pixels at
75 get_four_texels(const struct spu_texture_level
*tlevel
, uint face
,
76 vec_int4 x
, vec_int4 y
,
79 unsigned texture_ea
= (uintptr_t) tlevel
->start
;
80 const vec_int4 tile_x
= spu_rlmask(x
, -5); /* tile_x = x / 32 */
81 const vec_int4 tile_y
= spu_rlmask(y
, -5); /* tile_y = y / 32 */
82 const qword offset_x
= si_andi((qword
) x
, 0x1f); /* offset_x = x & 0x1f */
83 const qword offset_y
= si_andi((qword
) y
, 0x1f); /* offset_y = y & 0x1f */
85 const qword tiles_per_row
= (qword
) spu_splats(tlevel
->tiles_per_row
);
86 const qword tile_size
= (qword
) spu_splats((unsigned) sizeof(tile_t
));
88 qword tile_offset
= si_mpya((qword
) tile_y
, tiles_per_row
, (qword
) tile_x
);
89 tile_offset
= si_mpy((qword
) tile_offset
, tile_size
);
91 qword texel_offset
= si_a(si_mpyui(offset_y
, 32), offset_x
);
92 texel_offset
= si_mpyui(texel_offset
, 4);
94 vec_uint4 offset
= (vec_uint4
) si_a(tile_offset
, texel_offset
);
96 texture_ea
= texture_ea
+ face
* tlevel
->bytes_per_image
;
98 spu_dcache_fetch_unaligned((qword
*) & texels
[0],
99 texture_ea
+ spu_extract(offset
, 0), 4);
100 spu_dcache_fetch_unaligned((qword
*) & texels
[1],
101 texture_ea
+ spu_extract(offset
, 1), 4);
102 spu_dcache_fetch_unaligned((qword
*) & texels
[2],
103 texture_ea
+ spu_extract(offset
, 2), 4);
104 spu_dcache_fetch_unaligned((qword
*) & texels
[3],
105 texture_ea
+ spu_extract(offset
, 3), 4);
109 /** clamp vec to [0, max] */
110 static INLINE vector
signed int
111 spu_clamp(vector
signed int vec
, vector
signed int max
)
113 static const vector
signed int zero
= {0,0,0,0};
114 vector
unsigned int c
;
115 c
= spu_cmpgt(vec
, zero
); /* c = vec > zero ? ~0 : 0 */
116 vec
= spu_sel(zero
, vec
, c
);
117 c
= spu_cmpgt(vec
, max
); /* c = vec > max ? ~0 : 0 */
118 vec
= spu_sel(vec
, max
, c
);
125 * Do nearest texture sampling for four pixels.
126 * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa).
129 sample_texture_2d_nearest(vector
float s
, vector
float t
,
130 uint unit
, uint level
, uint face
,
131 vector
float colors
[4])
133 const struct spu_texture_level
*tlevel
= &spu
.texture
[unit
].level
[level
];
134 vector
float ss
= spu_mul(s
, tlevel
->scale_s
);
135 vector
float tt
= spu_mul(t
, tlevel
->scale_t
);
136 vector
signed int is
= spu_convts(ss
, 0);
137 vector
signed int it
= spu_convts(tt
, 0);
140 /* PIPE_TEX_WRAP_REPEAT */
141 is
= spu_and(is
, tlevel
->mask_s
);
142 it
= spu_and(it
, tlevel
->mask_t
);
144 /* PIPE_TEX_WRAP_CLAMP */
145 is
= spu_clamp(is
, tlevel
->max_s
);
146 it
= spu_clamp(it
, tlevel
->max_t
);
148 get_four_texels(tlevel
, face
, is
, it
, texels
);
150 /* convert four packed ARGBA pixels to float RRRR,GGGG,BBBB,AAAA */
151 spu_unpack_A8R8G8B8_transpose4(texels
, colors
);
156 * Do bilinear texture sampling for four pixels.
157 * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa).
160 sample_texture_2d_bilinear(vector
float s
, vector
float t
,
161 uint unit
, uint level
, uint face
,
162 vector
float colors
[4])
164 const struct spu_texture_level
*tlevel
= &spu
.texture
[unit
].level
[level
];
165 static const vector
float half
= {-0.5f
, -0.5f
, -0.5f
, -0.5f
};
167 vector
float ss
= spu_madd(s
, tlevel
->scale_s
, half
);
168 vector
float tt
= spu_madd(t
, tlevel
->scale_t
, half
);
170 vector
signed int is0
= spu_convts(ss
, 0);
171 vector
signed int it0
= spu_convts(tt
, 0);
174 vector
signed int is1
= spu_add(is0
, 1);
175 vector
signed int it1
= spu_add(it0
, 1);
177 /* PIPE_TEX_WRAP_REPEAT */
178 is0
= spu_and(is0
, tlevel
->mask_s
);
179 it0
= spu_and(it0
, tlevel
->mask_t
);
180 is1
= spu_and(is1
, tlevel
->mask_s
);
181 it1
= spu_and(it1
, tlevel
->mask_t
);
183 /* PIPE_TEX_WRAP_CLAMP */
184 is0
= spu_clamp(is0
, tlevel
->max_s
);
185 it0
= spu_clamp(it0
, tlevel
->max_t
);
186 is1
= spu_clamp(is1
, tlevel
->max_s
);
187 it1
= spu_clamp(it1
, tlevel
->max_t
);
189 /* get packed int texels */
190 vector
unsigned int texels
[16];
191 get_four_texels(tlevel
, face
, is0
, it0
, texels
+ 0); /* upper-left */
192 get_four_texels(tlevel
, face
, is1
, it0
, texels
+ 4); /* upper-right */
193 get_four_texels(tlevel
, face
, is0
, it1
, texels
+ 8); /* lower-left */
194 get_four_texels(tlevel
, face
, is1
, it1
, texels
+ 12); /* lower-right */
196 /* convert packed int texels to float colors */
197 vector
float ftexels
[16];
198 spu_unpack_A8R8G8B8_transpose4(texels
+ 0, ftexels
+ 0);
199 spu_unpack_A8R8G8B8_transpose4(texels
+ 4, ftexels
+ 4);
200 spu_unpack_A8R8G8B8_transpose4(texels
+ 8, ftexels
+ 8);
201 spu_unpack_A8R8G8B8_transpose4(texels
+ 12, ftexels
+ 12);
203 /* Compute weighting factors in [0,1]
204 * Multiply texcoord by 1024, AND with 1023, convert back to float.
206 vector
float ss1024
= spu_mul(ss
, spu_splats(1024.0f
));
207 vector
signed int iss1024
= spu_convts(ss1024
, 0);
208 iss1024
= spu_and(iss1024
, 1023);
209 vector
float sWeights0
= spu_convtf(iss1024
, 10);
211 vector
float tt1024
= spu_mul(tt
, spu_splats(1024.0f
));
212 vector
signed int itt1024
= spu_convts(tt1024
, 0);
213 itt1024
= spu_and(itt1024
, 1023);
214 vector
float tWeights0
= spu_convtf(itt1024
, 10);
216 /* 1 - sWeight and 1 - tWeight */
217 vector
float sWeights1
= spu_sub(spu_splats(1.0f
), sWeights0
);
218 vector
float tWeights1
= spu_sub(spu_splats(1.0f
), tWeights0
);
220 /* reds, for four pixels */
221 ftexels
[ 0] = spu_mul(ftexels
[ 0], spu_mul(sWeights1
, tWeights1
)); /*ul*/
222 ftexels
[ 4] = spu_mul(ftexels
[ 4], spu_mul(sWeights0
, tWeights1
)); /*ur*/
223 ftexels
[ 8] = spu_mul(ftexels
[ 8], spu_mul(sWeights1
, tWeights0
)); /*ll*/
224 ftexels
[12] = spu_mul(ftexels
[12], spu_mul(sWeights0
, tWeights0
)); /*lr*/
225 colors
[0] = spu_add(spu_add(ftexels
[0], ftexels
[4]),
226 spu_add(ftexels
[8], ftexels
[12]));
228 /* greens, for four pixels */
229 ftexels
[ 1] = spu_mul(ftexels
[ 1], spu_mul(sWeights1
, tWeights1
)); /*ul*/
230 ftexels
[ 5] = spu_mul(ftexels
[ 5], spu_mul(sWeights0
, tWeights1
)); /*ur*/
231 ftexels
[ 9] = spu_mul(ftexels
[ 9], spu_mul(sWeights1
, tWeights0
)); /*ll*/
232 ftexels
[13] = spu_mul(ftexels
[13], spu_mul(sWeights0
, tWeights0
)); /*lr*/
233 colors
[1] = spu_add(spu_add(ftexels
[1], ftexels
[5]),
234 spu_add(ftexels
[9], ftexels
[13]));
236 /* blues, for four pixels */
237 ftexels
[ 2] = spu_mul(ftexels
[ 2], spu_mul(sWeights1
, tWeights1
)); /*ul*/
238 ftexels
[ 6] = spu_mul(ftexels
[ 6], spu_mul(sWeights0
, tWeights1
)); /*ur*/
239 ftexels
[10] = spu_mul(ftexels
[10], spu_mul(sWeights1
, tWeights0
)); /*ll*/
240 ftexels
[14] = spu_mul(ftexels
[14], spu_mul(sWeights0
, tWeights0
)); /*lr*/
241 colors
[2] = spu_add(spu_add(ftexels
[2], ftexels
[6]),
242 spu_add(ftexels
[10], ftexels
[14]));
244 /* alphas, for four pixels */
245 ftexels
[ 3] = spu_mul(ftexels
[ 3], spu_mul(sWeights1
, tWeights1
)); /*ul*/
246 ftexels
[ 7] = spu_mul(ftexels
[ 7], spu_mul(sWeights0
, tWeights1
)); /*ur*/
247 ftexels
[11] = spu_mul(ftexels
[11], spu_mul(sWeights1
, tWeights0
)); /*ll*/
248 ftexels
[15] = spu_mul(ftexels
[15], spu_mul(sWeights0
, tWeights0
)); /*lr*/
249 colors
[3] = spu_add(spu_add(ftexels
[3], ftexels
[7]),
250 spu_add(ftexels
[11], ftexels
[15]));
256 * Adapted from /opt/cell/sdk/usr/spu/include/transpose_matrix4x4.h
259 transpose(vector
unsigned int *mOut0
,
260 vector
unsigned int *mOut1
,
261 vector
unsigned int *mOut2
,
262 vector
unsigned int *mOut3
,
263 vector
unsigned int *mIn
)
265 vector
unsigned int abcd
, efgh
, ijkl
, mnop
; /* input vectors */
266 vector
unsigned int aeim
, bfjn
, cgko
, dhlp
; /* output vectors */
267 vector
unsigned int aibj
, ckdl
, emfn
, gohp
; /* intermediate vectors */
269 vector
unsigned char shufflehi
= ((vector
unsigned char) {
270 0x00, 0x01, 0x02, 0x03,
271 0x10, 0x11, 0x12, 0x13,
272 0x04, 0x05, 0x06, 0x07,
273 0x14, 0x15, 0x16, 0x17});
274 vector
unsigned char shufflelo
= ((vector
unsigned char) {
275 0x08, 0x09, 0x0A, 0x0B,
276 0x18, 0x19, 0x1A, 0x1B,
277 0x0C, 0x0D, 0x0E, 0x0F,
278 0x1C, 0x1D, 0x1E, 0x1F});
284 aibj
= spu_shuffle(abcd
, ijkl
, shufflehi
);
285 ckdl
= spu_shuffle(abcd
, ijkl
, shufflelo
);
286 emfn
= spu_shuffle(efgh
, mnop
, shufflehi
);
287 gohp
= spu_shuffle(efgh
, mnop
, shufflelo
);
289 aeim
= spu_shuffle(aibj
, emfn
, shufflehi
);
290 bfjn
= spu_shuffle(aibj
, emfn
, shufflelo
);
291 cgko
= spu_shuffle(ckdl
, gohp
, shufflehi
);
292 dhlp
= spu_shuffle(ckdl
, gohp
, shufflelo
);
302 * Bilinear filtering, using int instead of float arithmetic for computing
306 sample_texture_2d_bilinear_int(vector
float s
, vector
float t
,
307 uint unit
, uint level
, uint face
,
308 vector
float colors
[4])
310 const struct spu_texture_level
*tlevel
= &spu
.texture
[unit
].level
[level
];
311 static const vector
float half
= {-0.5f
, -0.5f
, -0.5f
, -0.5f
};
313 /* Scale texcoords by size of texture, and add half pixel bias */
314 vector
float ss
= spu_madd(s
, tlevel
->scale_s
, half
);
315 vector
float tt
= spu_madd(t
, tlevel
->scale_t
, half
);
317 /* convert float coords to fixed-pt coords with 7 fraction bits */
318 vector
signed int is
= spu_convts(ss
, 7); /* XXX really need floor() here */
319 vector
signed int it
= spu_convts(tt
, 7); /* XXX really need floor() here */
321 /* compute integer texel weights in [0, 127] */
322 vector
signed int sWeights0
= spu_and(is
, 127);
323 vector
signed int tWeights0
= spu_and(it
, 127);
324 vector
signed int sWeights1
= spu_sub(127, sWeights0
);
325 vector
signed int tWeights1
= spu_sub(127, tWeights0
);
327 /* texel coords: is0 = is / 128, it0 = is / 128 */
328 vector
signed int is0
= spu_rlmask(is
, -7);
329 vector
signed int it0
= spu_rlmask(it
, -7);
331 /* texel coords: i1 = is0 + 1, it1 = it0 + 1 */
332 vector
signed int is1
= spu_add(is0
, 1);
333 vector
signed int it1
= spu_add(it0
, 1);
335 /* PIPE_TEX_WRAP_REPEAT */
336 is0
= spu_and(is0
, tlevel
->mask_s
);
337 it0
= spu_and(it0
, tlevel
->mask_t
);
338 is1
= spu_and(is1
, tlevel
->mask_s
);
339 it1
= spu_and(it1
, tlevel
->mask_t
);
341 /* PIPE_TEX_WRAP_CLAMP */
342 is0
= spu_clamp(is0
, tlevel
->max_s
);
343 it0
= spu_clamp(it0
, tlevel
->max_t
);
344 is1
= spu_clamp(is1
, tlevel
->max_s
);
345 it1
= spu_clamp(it1
, tlevel
->max_t
);
347 /* get packed int texels */
348 vector
unsigned int texels
[16];
349 get_four_texels(tlevel
, face
, is0
, it0
, texels
+ 0); /* upper-left */
350 get_four_texels(tlevel
, face
, is1
, it0
, texels
+ 4); /* upper-right */
351 get_four_texels(tlevel
, face
, is0
, it1
, texels
+ 8); /* lower-left */
352 get_four_texels(tlevel
, face
, is1
, it1
, texels
+ 12); /* lower-right */
354 /* twiddle packed 32-bit BGRA pixels into RGBA as four unsigned ints */
356 static const unsigned char ZERO
= 0x80;
358 for (i
= 0; i
< 16; i
++) {
359 texels
[i
] = spu_shuffle(texels
[i
], texels
[i
],
360 ((vector
unsigned char) {
364 ZERO
, ZERO
, ZERO
, 0}));
368 /* convert RGBA,RGBA,RGBA,RGBA to RRRR,GGGG,BBBB,AAAA */
369 vector
unsigned int texel0
, texel1
, texel2
, texel3
, texel4
, texel5
, texel6
, texel7
,
370 texel8
, texel9
, texel10
, texel11
, texel12
, texel13
, texel14
, texel15
;
371 transpose(&texel0
, &texel1
, &texel2
, &texel3
, texels
+ 0);
372 transpose(&texel4
, &texel5
, &texel6
, &texel7
, texels
+ 4);
373 transpose(&texel8
, &texel9
, &texel10
, &texel11
, texels
+ 8);
374 transpose(&texel12
, &texel13
, &texel14
, &texel15
, texels
+ 12);
376 /* computed weighted colors */
377 vector
unsigned int c0
, c1
, c2
, c3
, cSum
;
380 c0
= (vector
unsigned int) si_mpy((qword
) texel0
, si_mpy((qword
) sWeights1
, (qword
) tWeights1
)); /*ul*/
381 c1
= (vector
unsigned int) si_mpy((qword
) texel4
, si_mpy((qword
) sWeights0
, (qword
) tWeights1
)); /*ur*/
382 c2
= (vector
unsigned int) si_mpy((qword
) texel8
, si_mpy((qword
) sWeights1
, (qword
) tWeights0
)); /*ll*/
383 c3
= (vector
unsigned int) si_mpy((qword
) texel12
, si_mpy((qword
) sWeights0
, (qword
) tWeights0
)); /*lr*/
384 cSum
= spu_add(spu_add(c0
, c1
), spu_add(c2
, c3
));
385 colors
[0] = spu_convtf(cSum
, 22);
388 c0
= (vector
unsigned int) si_mpy((qword
) texel1
, si_mpy((qword
) sWeights1
, (qword
) tWeights1
)); /*ul*/
389 c1
= (vector
unsigned int) si_mpy((qword
) texel5
, si_mpy((qword
) sWeights0
, (qword
) tWeights1
)); /*ur*/
390 c2
= (vector
unsigned int) si_mpy((qword
) texel9
, si_mpy((qword
) sWeights1
, (qword
) tWeights0
)); /*ll*/
391 c3
= (vector
unsigned int) si_mpy((qword
) texel13
, si_mpy((qword
) sWeights0
, (qword
) tWeights0
)); /*lr*/
392 cSum
= spu_add(spu_add(c0
, c1
), spu_add(c2
, c3
));
393 colors
[1] = spu_convtf(cSum
, 22);
396 c0
= (vector
unsigned int) si_mpy((qword
) texel2
, si_mpy((qword
) sWeights1
, (qword
) tWeights1
)); /*ul*/
397 c1
= (vector
unsigned int) si_mpy((qword
) texel6
, si_mpy((qword
) sWeights0
, (qword
) tWeights1
)); /*ur*/
398 c2
= (vector
unsigned int) si_mpy((qword
) texel10
, si_mpy((qword
) sWeights1
, (qword
) tWeights0
)); /*ll*/
399 c3
= (vector
unsigned int) si_mpy((qword
) texel14
, si_mpy((qword
) sWeights0
, (qword
) tWeights0
)); /*lr*/
400 cSum
= spu_add(spu_add(c0
, c1
), spu_add(c2
, c3
));
401 colors
[2] = spu_convtf(cSum
, 22);
404 c0
= (vector
unsigned int) si_mpy((qword
) texel3
, si_mpy((qword
) sWeights1
, (qword
) tWeights1
)); /*ul*/
405 c1
= (vector
unsigned int) si_mpy((qword
) texel7
, si_mpy((qword
) sWeights0
, (qword
) tWeights1
)); /*ur*/
406 c2
= (vector
unsigned int) si_mpy((qword
) texel11
, si_mpy((qword
) sWeights1
, (qword
) tWeights0
)); /*ll*/
407 c3
= (vector
unsigned int) si_mpy((qword
) texel15
, si_mpy((qword
) sWeights0
, (qword
) tWeights0
)); /*lr*/
408 cSum
= spu_add(spu_add(c0
, c1
), spu_add(c2
, c3
));
409 colors
[3] = spu_convtf(cSum
, 22);
415 * Compute level of detail factor from texcoords.
418 compute_lambda_2d(uint unit
, vector
float s
, vector
float t
)
421 float width
= spu
.texture
[unit
].level
[baseLevel
].width
;
422 float height
= spu
.texture
[unit
].level
[baseLevel
].width
;
423 float dsdx
= width
* (spu_extract(s
, 1) - spu_extract(s
, 0));
424 float dsdy
= width
* (spu_extract(s
, 2) - spu_extract(s
, 0));
425 float dtdx
= height
* (spu_extract(t
, 1) - spu_extract(t
, 0));
426 float dtdy
= height
* (spu_extract(t
, 2) - spu_extract(t
, 0));
429 float x
= dsdx
* dsdx
+ dtdx
* dtdx
;
430 float y
= dsdy
* dsdy
+ dtdy
* dtdy
;
431 float rho
= x
> y
? x
: y
;
439 float rho
= (dsdx
+ dsdy
+ dtdx
+ dtdy
) * 0.5;
441 float lambda
= logf(rho
) * 1.442695f
; /* compute logbase2(rho) */
447 * Blend two sets of colors according to weight.
450 blend_colors(vector
float c0
[4], const vector
float c1
[4], float weight
)
452 vector
float t
= spu_splats(weight
);
453 vector
float dc0
= spu_sub(c1
[0], c0
[0]);
454 vector
float dc1
= spu_sub(c1
[1], c0
[1]);
455 vector
float dc2
= spu_sub(c1
[2], c0
[2]);
456 vector
float dc3
= spu_sub(c1
[3], c0
[3]);
457 c0
[0] = spu_madd(dc0
, t
, c0
[0]);
458 c0
[1] = spu_madd(dc1
, t
, c0
[1]);
459 c0
[2] = spu_madd(dc2
, t
, c0
[2]);
460 c0
[3] = spu_madd(dc3
, t
, c0
[3]);
465 * Texture sampling with level of detail selection and possibly mipmap
469 sample_texture_2d_lod(vector
float s
, vector
float t
,
470 uint unit
, uint level_ignored
, uint face
,
471 vector
float colors
[4])
474 * Note that we're computing a lambda/lod here that's used for all
475 * four pixels in the quad.
477 float lambda
= compute_lambda_2d(unit
, s
, t
);
480 (void) level_ignored
;
483 lambda
+= spu
.sampler
[unit
].lod_bias
;
486 if (lambda
< spu
.sampler
[unit
].min_lod
)
487 lambda
= spu
.sampler
[unit
].min_lod
;
488 else if (lambda
> spu
.sampler
[unit
].max_lod
)
489 lambda
= spu
.sampler
[unit
].max_lod
;
491 if (lambda
<= 0.0f
) {
493 spu
.mag_sample_texture_2d
[unit
](s
, t
, unit
, 0, face
, colors
);
497 if (spu
.sampler
[unit
].min_img_filter
== PIPE_TEX_FILTER_LINEAR
) {
498 /* sample two mipmap levels and interpolate */
499 int level
= (int) lambda
;
500 if (level
> (int) spu
.texture
[unit
].max_level
)
501 level
= spu
.texture
[unit
].max_level
;
502 spu
.min_sample_texture_2d
[unit
](s
, t
, unit
, level
, face
, colors
);
503 if (spu
.sampler
[unit
].min_img_filter
== PIPE_TEX_FILTER_LINEAR
) {
504 /* sample second mipmap level */
505 float weight
= lambda
- (float) level
;
507 if (level
<= (int) spu
.texture
[unit
].max_level
) {
508 vector
float colors2
[4];
509 spu
.min_sample_texture_2d
[unit
](s
, t
, unit
, level
, face
, colors2
);
510 blend_colors(colors
, colors2
, weight
);
515 /* sample one mipmap level */
516 int level
= (int) (lambda
+ 0.5f
);
517 if (level
> (int) spu
.texture
[unit
].max_level
)
518 level
= spu
.texture
[unit
].max_level
;
519 spu
.min_sample_texture_2d
[unit
](s
, t
, unit
, level
, face
, colors
);
525 /** XXX need a SIMD version of this */
527 choose_cube_face(float rx
, float ry
, float rz
, float *newS
, float *newT
)
531 direction target sc tc ma
532 ---------- ------------------------------- --- --- ---
533 +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx
534 -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx
535 +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry
536 -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry
537 +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz
538 -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz
540 const float arx
= fabsf(rx
);
541 const float ary
= fabsf(ry
);
542 const float arz
= fabsf(rz
);
546 if (arx
> ary
&& arx
> arz
) {
548 face
= PIPE_TEX_FACE_POS_X
;
554 face
= PIPE_TEX_FACE_NEG_X
;
560 else if (ary
> arx
&& ary
> arz
) {
562 face
= PIPE_TEX_FACE_POS_Y
;
568 face
= PIPE_TEX_FACE_NEG_Y
;
576 face
= PIPE_TEX_FACE_POS_Z
;
582 face
= PIPE_TEX_FACE_NEG_Z
;
589 *newS
= (sc
/ ma
+ 1.0F
) * 0.5F
;
590 *newT
= (tc
/ ma
+ 1.0F
) * 0.5F
;
598 sample_texture_cube(vector
float s
, vector
float t
, vector
float r
,
599 uint unit
, vector
float colors
[4])
601 uint p
, faces
[4], level
= 0;
602 float newS
[4], newT
[4];
604 /* Compute cube faces referenced by the four sets of texcoords.
605 * XXX we should SIMD-ize this.
607 for (p
= 0; p
< 4; p
++) {
608 float rx
= spu_extract(s
, p
);
609 float ry
= spu_extract(t
, p
);
610 float rz
= spu_extract(r
, p
);
611 faces
[p
] = choose_cube_face(rx
, ry
, rz
, &newS
[p
], &newT
[p
]);
614 if (faces
[0] == faces
[1] &&
615 faces
[0] == faces
[2] &&
616 faces
[0] == faces
[3]) {
617 /* GOOD! All four texcoords refer to the same cube face */
618 s
= (vector
float) {newS
[0], newS
[1], newS
[2], newS
[3]};
619 t
= (vector
float) {newT
[0], newT
[1], newT
[2], newT
[3]};
620 spu
.sample_texture_2d
[unit
](s
, t
, unit
, level
, faces
[0], colors
);
623 /* BAD! The four texcoords refer to different faces */
624 for (p
= 0; p
< 4; p
++) {
627 spu
.sample_texture_2d
[unit
](spu_splats(newS
[p
]), spu_splats(newT
[p
]),
628 unit
, level
, faces
[p
], c
);
630 float red
= spu_extract(c
[0], p
);
631 float green
= spu_extract(c
[1], p
);
632 float blue
= spu_extract(c
[2], p
);
633 float alpha
= spu_extract(c
[3], p
);
635 colors
[0] = spu_insert(red
, colors
[0], p
);
636 colors
[1] = spu_insert(green
, colors
[1], p
);
637 colors
[2] = spu_insert(blue
, colors
[2], p
);
638 colors
[3] = spu_insert(alpha
, colors
[3], p
);