83cf7dc39423388719bde31b3ccab122a51007f3
1 /**************************************************************************
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
31 #include "pipe/p_compiler.h"
33 #include "spu_texture.h"
35 #include "spu_colorpack.h"
36 #include "spu_dcache.h"
40 * Mark all tex cache entries as invalid.
43 invalidate_tex_cache(void)
46 for (lvl
= 0; lvl
< CELL_MAX_TEXTURE_LEVELS
; lvl
++) {
48 uint bytes
= 4 * spu
.texture
[unit
].level
[lvl
].width
49 * spu
.texture
[unit
].level
[lvl
].height
;
51 spu_dcache_mark_dirty((unsigned) spu
.texture
[unit
].level
[lvl
].start
, bytes
);
57 * Get four texels from locations (x[0], y[0]), (x[1], y[1]) ...
59 * NOTE: in the typical case of bilinear filtering, the four texels
60 * are in a 2x2 group so we could get by with just two dcache fetches
61 * (two side-by-side texels per fetch). But when bilinear filtering
62 * wraps around a texture edge, we'll probably need code like we have
64 * FURTHERMORE: since we're rasterizing a quad of 2x2 pixels at a time,
65 * it's quite likely that the four pixels in a quad will need some of the
66 * same texels. So look into doing texture fetches for four pixels at
70 get_four_texels(uint unit
, uint level
, vec_uint4 x
, vec_uint4 y
,
73 const struct spu_texture_level
*tlevel
= &spu
.texture
[unit
].level
[level
];
74 const unsigned texture_ea
= (uintptr_t) tlevel
->start
;
75 const vec_uint4 tile_x
= spu_rlmask(x
, -5); /* tile_x = x / 32 */
76 const vec_uint4 tile_y
= spu_rlmask(y
, -5); /* tile_y = y / 32 */
77 const qword offset_x
= si_andi((qword
) x
, 0x1f); /* offset_x = x & 0x1f */
78 const qword offset_y
= si_andi((qword
) y
, 0x1f); /* offset_y = y & 0x1f */
80 const qword tiles_per_row
= (qword
) spu_splats(tlevel
->tiles_per_row
);
81 const qword tile_size
= (qword
) spu_splats((unsigned) sizeof(tile_t
));
83 qword tile_offset
= si_mpya((qword
) tile_y
, tiles_per_row
, (qword
) tile_x
);
84 tile_offset
= si_mpy((qword
) tile_offset
, tile_size
);
86 qword texel_offset
= si_a(si_mpyui(offset_y
, 32), offset_x
);
87 texel_offset
= si_mpyui(texel_offset
, 4);
89 vec_uint4 offset
= (vec_uint4
) si_a(tile_offset
, texel_offset
);
91 spu_dcache_fetch_unaligned((qword
*) & texels
[0],
92 texture_ea
+ spu_extract(offset
, 0), 4);
93 spu_dcache_fetch_unaligned((qword
*) & texels
[1],
94 texture_ea
+ spu_extract(offset
, 1), 4);
95 spu_dcache_fetch_unaligned((qword
*) & texels
[2],
96 texture_ea
+ spu_extract(offset
, 2), 4);
97 spu_dcache_fetch_unaligned((qword
*) & texels
[3],
98 texture_ea
+ spu_extract(offset
, 3), 4);
104 * Do nearest texture sampling for four pixels.
105 * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa).
108 sample_texture4_nearest(vector
float s
, vector
float t
,
109 vector
float r
, vector
float q
,
110 uint unit
, uint level
, vector
float colors
[4])
112 vector
float ss
= spu_mul(s
, spu
.texture
[unit
].level
[level
].width4
);
113 vector
float tt
= spu_mul(t
, spu
.texture
[unit
].level
[level
].height4
);
114 vector
unsigned int is
= spu_convtu(ss
, 0);
115 vector
unsigned int it
= spu_convtu(tt
, 0);
118 /* PIPE_TEX_WRAP_REPEAT */
119 is
= spu_and(is
, spu
.texture
[unit
].level
[level
].tex_size_x_mask
);
120 it
= spu_and(it
, spu
.texture
[unit
].level
[level
].tex_size_y_mask
);
122 get_four_texels(unit
, level
, is
, it
, texels
);
124 /* convert four packed ARGBA pixels to float RRRR,GGGG,BBBB,AAAA */
125 spu_unpack_A8R8G8B8_transpose4(texels
, colors
);
130 * Do bilinear texture sampling for four pixels.
131 * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa).
134 sample_texture4_bilinear(vector
float s
, vector
float t
,
135 vector
float r
, vector
float q
,
136 uint unit
, uint level
, vector
float colors
[4])
138 vector
float ss
= spu_madd(s
, spu
.texture
[unit
].level
[level
].width4
, spu_splats(-0.5f
));
139 vector
float tt
= spu_madd(t
, spu
.texture
[unit
].level
[level
].height4
, spu_splats(-0.5f
));
141 vector
unsigned int is0
= (vector
unsigned int) spu_convts(ss
, 0);
142 vector
unsigned int it0
= (vector
unsigned int) spu_convts(tt
, 0);
145 vector
unsigned int is1
= spu_add(is0
, 1);
146 vector
unsigned int it1
= spu_add(it0
, 1);
148 /* PIPE_TEX_WRAP_REPEAT */
149 is0
= spu_and(is0
, spu
.texture
[unit
].level
[level
].tex_size_x_mask
);
150 it0
= spu_and(it0
, spu
.texture
[unit
].level
[level
].tex_size_y_mask
);
151 is1
= spu_and(is1
, spu
.texture
[unit
].level
[level
].tex_size_x_mask
);
152 it1
= spu_and(it1
, spu
.texture
[unit
].level
[level
].tex_size_y_mask
);
154 /* get packed int texels */
155 vector
unsigned int texels
[16];
156 get_four_texels(unit
, level
, is0
, it0
, texels
+ 0); /* upper-left */
157 get_four_texels(unit
, level
, is1
, it0
, texels
+ 4); /* upper-right */
158 get_four_texels(unit
, level
, is0
, it1
, texels
+ 8); /* lower-left */
159 get_four_texels(unit
, level
, is1
, it1
, texels
+ 12); /* lower-right */
161 /* XXX possibly rework following code to compute the weighted sample
162 * colors with integer arithmetic for fewer int->float conversions.
165 /* convert packed int texels to float colors */
166 vector
float ftexels
[16];
167 spu_unpack_A8R8G8B8_transpose4(texels
+ 0, ftexels
+ 0);
168 spu_unpack_A8R8G8B8_transpose4(texels
+ 4, ftexels
+ 4);
169 spu_unpack_A8R8G8B8_transpose4(texels
+ 8, ftexels
+ 8);
170 spu_unpack_A8R8G8B8_transpose4(texels
+ 12, ftexels
+ 12);
172 /* Compute weighting factors in [0,1]
173 * Multiply texcoord by 1024, AND with 1023, convert back to float.
175 vector
float ss1024
= spu_mul(ss
, spu_splats(1024.0f
));
176 vector
signed int iss1024
= spu_convts(ss1024
, 0);
177 iss1024
= spu_and(iss1024
, 1023);
178 vector
float sWeights0
= spu_convtf(iss1024
, 10);
180 vector
float tt1024
= spu_mul(tt
, spu_splats(1024.0f
));
181 vector
signed int itt1024
= spu_convts(tt1024
, 0);
182 itt1024
= spu_and(itt1024
, 1023);
183 vector
float tWeights0
= spu_convtf(itt1024
, 10);
185 /* 1 - sWeight and 1 - tWeight */
186 vector
float sWeights1
= spu_sub(spu_splats(1.0f
), sWeights0
);
187 vector
float tWeights1
= spu_sub(spu_splats(1.0f
), tWeights0
);
189 /* reds, for four pixels */
190 ftexels
[ 0] = spu_mul(ftexels
[ 0], spu_mul(sWeights1
, tWeights1
)); /*ul*/
191 ftexels
[ 4] = spu_mul(ftexels
[ 4], spu_mul(sWeights0
, tWeights1
)); /*ur*/
192 ftexels
[ 8] = spu_mul(ftexels
[ 8], spu_mul(sWeights1
, tWeights0
)); /*ll*/
193 ftexels
[12] = spu_mul(ftexels
[12], spu_mul(sWeights0
, tWeights0
)); /*lr*/
194 colors
[0] = spu_add(spu_add(ftexels
[0], ftexels
[4]),
195 spu_add(ftexels
[8], ftexels
[12]));
197 /* greens, for four pixels */
198 ftexels
[ 1] = spu_mul(ftexels
[ 1], spu_mul(sWeights1
, tWeights1
)); /*ul*/
199 ftexels
[ 5] = spu_mul(ftexels
[ 5], spu_mul(sWeights0
, tWeights1
)); /*ur*/
200 ftexels
[ 9] = spu_mul(ftexels
[ 9], spu_mul(sWeights1
, tWeights0
)); /*ll*/
201 ftexels
[13] = spu_mul(ftexels
[13], spu_mul(sWeights0
, tWeights0
)); /*lr*/
202 colors
[1] = spu_add(spu_add(ftexels
[1], ftexels
[5]),
203 spu_add(ftexels
[9], ftexels
[13]));
205 /* blues, for four pixels */
206 ftexels
[ 2] = spu_mul(ftexels
[ 2], spu_mul(sWeights1
, tWeights1
)); /*ul*/
207 ftexels
[ 6] = spu_mul(ftexels
[ 6], spu_mul(sWeights0
, tWeights1
)); /*ur*/
208 ftexels
[10] = spu_mul(ftexels
[10], spu_mul(sWeights1
, tWeights0
)); /*ll*/
209 ftexels
[14] = spu_mul(ftexels
[14], spu_mul(sWeights0
, tWeights0
)); /*lr*/
210 colors
[2] = spu_add(spu_add(ftexels
[2], ftexels
[6]),
211 spu_add(ftexels
[10], ftexels
[14]));
213 /* alphas, for four pixels */
214 ftexels
[ 3] = spu_mul(ftexels
[ 3], spu_mul(sWeights1
, tWeights1
)); /*ul*/
215 ftexels
[ 7] = spu_mul(ftexels
[ 7], spu_mul(sWeights0
, tWeights1
)); /*ur*/
216 ftexels
[11] = spu_mul(ftexels
[11], spu_mul(sWeights1
, tWeights0
)); /*ll*/
217 ftexels
[15] = spu_mul(ftexels
[15], spu_mul(sWeights0
, tWeights0
)); /*lr*/
218 colors
[3] = spu_add(spu_add(ftexels
[3], ftexels
[7]),
219 spu_add(ftexels
[11], ftexels
[15]));
225 * Adapted from /opt/cell/sdk/usr/spu/include/transpose_matrix4x4.h
228 transpose(vector
unsigned int *mOut0
,
229 vector
unsigned int *mOut1
,
230 vector
unsigned int *mOut2
,
231 vector
unsigned int *mOut3
,
232 vector
unsigned int *mIn
)
234 vector
unsigned int abcd
, efgh
, ijkl
, mnop
; /* input vectors */
235 vector
unsigned int aeim
, bfjn
, cgko
, dhlp
; /* output vectors */
236 vector
unsigned int aibj
, ckdl
, emfn
, gohp
; /* intermediate vectors */
238 vector
unsigned char shufflehi
= ((vector
unsigned char) {
239 0x00, 0x01, 0x02, 0x03,
240 0x10, 0x11, 0x12, 0x13,
241 0x04, 0x05, 0x06, 0x07,
242 0x14, 0x15, 0x16, 0x17});
243 vector
unsigned char shufflelo
= ((vector
unsigned char) {
244 0x08, 0x09, 0x0A, 0x0B,
245 0x18, 0x19, 0x1A, 0x1B,
246 0x0C, 0x0D, 0x0E, 0x0F,
247 0x1C, 0x1D, 0x1E, 0x1F});
253 aibj
= spu_shuffle(abcd
, ijkl
, shufflehi
);
254 ckdl
= spu_shuffle(abcd
, ijkl
, shufflelo
);
255 emfn
= spu_shuffle(efgh
, mnop
, shufflehi
);
256 gohp
= spu_shuffle(efgh
, mnop
, shufflelo
);
258 aeim
= spu_shuffle(aibj
, emfn
, shufflehi
);
259 bfjn
= spu_shuffle(aibj
, emfn
, shufflelo
);
260 cgko
= spu_shuffle(ckdl
, gohp
, shufflehi
);
261 dhlp
= spu_shuffle(ckdl
, gohp
, shufflelo
);
271 * Bilinear filtering, using int intead of float arithmetic
274 sample_texture4_bilinear_2(vector
float s
, vector
float t
,
275 vector
float r
, vector
float q
,
276 uint unit
, uint level
, vector
float colors
[4])
278 static const vector
float half
= {-0.5f
, -0.5f
, -0.5f
, -0.5f
};
279 /* Scale texcoords by size of texture, and add half pixel bias */
280 vector
float ss
= spu_madd(s
, spu
.texture
[unit
].level
[level
].width4
, half
);
281 vector
float tt
= spu_madd(t
, spu
.texture
[unit
].level
[level
].height4
, half
);
283 /* convert float coords to fixed-pt coords with 8 fraction bits */
284 vector
unsigned int is
= (vector
unsigned int) spu_convts(ss
, 8);
285 vector
unsigned int it
= (vector
unsigned int) spu_convts(tt
, 8);
287 /* compute integer texel weights in [0, 255] */
288 vector
signed int sWeights0
= spu_and((vector
signed int) is
, 255);
289 vector
signed int tWeights0
= spu_and((vector
signed int) it
, 255);
290 vector
signed int sWeights1
= spu_sub(255, sWeights0
);
291 vector
signed int tWeights1
= spu_sub(255, tWeights0
);
293 /* texel coords: is0 = is / 256, it0 = is / 256 */
294 vector
unsigned int is0
= spu_rlmask(is
, -8);
295 vector
unsigned int it0
= spu_rlmask(it
, -8);
297 /* texel coords: i1 = is0 + 1, it1 = it0 + 1 */
298 vector
unsigned int is1
= spu_add(is0
, 1);
299 vector
unsigned int it1
= spu_add(it0
, 1);
301 /* PIPE_TEX_WRAP_REPEAT */
302 is0
= spu_and(is0
, spu
.texture
[unit
].level
[level
].tex_size_x_mask
);
303 it0
= spu_and(it0
, spu
.texture
[unit
].level
[level
].tex_size_y_mask
);
304 is1
= spu_and(is1
, spu
.texture
[unit
].level
[level
].tex_size_x_mask
);
305 it1
= spu_and(it1
, spu
.texture
[unit
].level
[level
].tex_size_y_mask
);
307 /* get packed int texels */
308 vector
unsigned int texels
[16];
309 get_four_texels(unit
, level
, is0
, it0
, texels
+ 0); /* upper-left */
310 get_four_texels(unit
, level
, is1
, it0
, texels
+ 4); /* upper-right */
311 get_four_texels(unit
, level
, is0
, it1
, texels
+ 8); /* lower-left */
312 get_four_texels(unit
, level
, is1
, it1
, texels
+ 12); /* lower-right */
314 /* twiddle packed 32-bit BGRA pixels into RGBA as four unsigned ints */
316 static const unsigned char ZERO
= 0x80;
318 for (i
= 0; i
< 16; i
++) {
319 texels
[i
] = spu_shuffle(texels
[i
], texels
[i
],
320 ((vector
unsigned char) {
324 ZERO
, ZERO
, ZERO
, 0}));
328 /* convert RGBA,RGBA,RGBA,RGBA to RRRR,GGGG,BBBB,AAAA */
329 vector
unsigned int texel0
, texel1
, texel2
, texel3
, texel4
, texel5
, texel6
, texel7
,
330 texel8
, texel9
, texel10
, texel11
, texel12
, texel13
, texel14
, texel15
;
331 transpose(&texel0
, &texel1
, &texel2
, &texel3
, texels
+ 0);
332 transpose(&texel4
, &texel5
, &texel6
, &texel7
, texels
+ 4);
333 transpose(&texel8
, &texel9
, &texel10
, &texel11
, texels
+ 8);
334 transpose(&texel12
, &texel13
, &texel14
, &texel15
, texels
+ 12);
336 /* computed weighted colors */
337 vector
unsigned int c0
, c1
, c2
, c3
, cSum
;
340 c0
= (vector
unsigned int) si_mpyu((qword
) texel0
, si_mpyu((qword
) sWeights1
, (qword
) tWeights1
)); /*ul*/
341 c1
= (vector
unsigned int) si_mpyu((qword
) texel4
, si_mpyu((qword
) sWeights0
, (qword
) tWeights1
)); /*ur*/
342 c2
= (vector
unsigned int) si_mpyu((qword
) texel8
, si_mpyu((qword
) sWeights1
, (qword
) tWeights0
)); /*ll*/
343 c3
= (vector
unsigned int) si_mpyu((qword
) texel12
, si_mpyu((qword
) sWeights0
, (qword
) tWeights0
)); /*lr*/
344 cSum
= spu_add(spu_add(c0
, c1
), spu_add(c2
, c3
));
345 colors
[0] = spu_convtf(cSum
, 24);
348 c0
= (vector
unsigned int) si_mpyu((qword
) texel1
, si_mpyu((qword
) sWeights1
, (qword
) tWeights1
)); /*ul*/
349 c1
= (vector
unsigned int) si_mpyu((qword
) texel5
, si_mpyu((qword
) sWeights0
, (qword
) tWeights1
)); /*ur*/
350 c2
= (vector
unsigned int) si_mpyu((qword
) texel9
, si_mpyu((qword
) sWeights1
, (qword
) tWeights0
)); /*ll*/
351 c3
= (vector
unsigned int) si_mpyu((qword
) texel13
, si_mpyu((qword
) sWeights0
, (qword
) tWeights0
)); /*lr*/
352 cSum
= spu_add(spu_add(c0
, c1
), spu_add(c2
, c3
));
353 colors
[1] = spu_convtf(cSum
, 24);
356 c0
= (vector
unsigned int) si_mpyu((qword
) texel2
, si_mpyu((qword
) sWeights1
, (qword
) tWeights1
)); /*ul*/
357 c1
= (vector
unsigned int) si_mpyu((qword
) texel6
, si_mpyu((qword
) sWeights0
, (qword
) tWeights1
)); /*ur*/
358 c2
= (vector
unsigned int) si_mpyu((qword
) texel10
, si_mpyu((qword
) sWeights1
, (qword
) tWeights0
)); /*ll*/
359 c3
= (vector
unsigned int) si_mpyu((qword
) texel14
, si_mpyu((qword
) sWeights0
, (qword
) tWeights0
)); /*lr*/
360 cSum
= spu_add(spu_add(c0
, c1
), spu_add(c2
, c3
));
361 colors
[2] = spu_convtf(cSum
, 24);
364 c0
= (vector
unsigned int) si_mpyu((qword
) texel3
, si_mpyu((qword
) sWeights1
, (qword
) tWeights1
)); /*ul*/
365 c1
= (vector
unsigned int) si_mpyu((qword
) texel7
, si_mpyu((qword
) sWeights0
, (qword
) tWeights1
)); /*ur*/
366 c2
= (vector
unsigned int) si_mpyu((qword
) texel11
, si_mpyu((qword
) sWeights1
, (qword
) tWeights0
)); /*ll*/
367 c3
= (vector
unsigned int) si_mpyu((qword
) texel15
, si_mpyu((qword
) sWeights0
, (qword
) tWeights0
)); /*lr*/
368 cSum
= spu_add(spu_add(c0
, c1
), spu_add(c2
, c3
));
369 colors
[3] = spu_convtf(cSum
, 24);
375 * Compute level of detail factor from texcoords.
378 compute_lambda(uint unit
, vector
float s
, vector
float t
)
381 float width
= spu
.texture
[unit
].level
[baseLevel
].width
;
382 float height
= spu
.texture
[unit
].level
[baseLevel
].width
;
383 float dsdx
= width
* (spu_extract(s
, 1) - spu_extract(s
, 0));
384 float dsdy
= width
* (spu_extract(s
, 2) - spu_extract(s
, 0));
385 float dtdx
= height
* (spu_extract(t
, 1) - spu_extract(t
, 0));
386 float dtdy
= height
* (spu_extract(t
, 2) - spu_extract(t
, 0));
387 float x
= dsdx
* dsdx
+ dtdx
* dtdx
;
388 float y
= dsdy
* dsdy
+ dtdy
* dtdy
;
389 float rho
= x
> y
? x
: y
;
391 float lambda
= logf(rho
) * 1.442695f
;
398 * Texture sampling with level of detail selection.
401 sample_texture4_lod(vector
float s
, vector
float t
,
402 vector
float r
, vector
float q
,
403 uint unit
, uint level_ignored
, vector
float colors
[4])
406 * Note that we're computing a lambda/lod here that's used for all
407 * four pixels in the quad.
409 float lambda
= compute_lambda(unit
, s
, t
);
412 lambda
+= spu
.sampler
[unit
].lod_bias
;
415 if (lambda
< spu
.sampler
[unit
].min_lod
)
416 lambda
= spu
.sampler
[unit
].min_lod
;
417 else if (lambda
> spu
.sampler
[unit
].max_lod
)
418 lambda
= spu
.sampler
[unit
].max_lod
;
420 if (lambda
<= 0.0f
) {
422 spu
.mag_sample_texture4
[unit
](s
, t
, r
, q
, unit
, 0, colors
);
426 int level
= (int) (lambda
+ 0.5f
);
427 if (level
> (int) spu
.texture
[unit
].max_level
)
428 level
= spu
.texture
[unit
].max_level
;
429 spu
.min_sample_texture4
[unit
](s
, t
, r
, q
, unit
, level
, colors
);
430 /* XXX to do: mipmap level interpolation */