1 /**************************************************************************
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
33 #include <transpose_matrix4x4.h>
34 #include "pipe/p_format.h"
36 #include "spu_colorpack.h"
37 #include "spu_per_fragment_op.h"
40 #define LINEAR_QUAD_LAYOUT 1
43 static INLINE vector
float
44 spu_min(vector
float a
, vector
float b
)
46 vector
unsigned int m
;
47 m
= spu_cmpgt(a
, b
); /* m = a > b ? ~0 : 0 */
48 return spu_sel(a
, b
, m
);
52 static INLINE vector
float
53 spu_max(vector
float a
, vector
float b
)
55 vector
unsigned int m
;
56 m
= spu_cmpgt(a
, b
); /* m = a > b ? ~0 : 0 */
57 return spu_sel(b
, a
, m
);
62 * Called by rasterizer for each quad after the shader has run. Do
63 * all the per-fragment operations including alpha test, z test,
64 * stencil test, blend, colormask and logicops. This is a
65 * fallback/debug function. In reality we'll use a generated function
66 * produced by the PPU. But this function is useful for
70 spu_fallback_fragment_ops(uint x
, uint y
,
72 tile_t
*depthStencilTile
,
78 vector
unsigned int mask
)
80 vector
float frag_aos
[4];
81 unsigned int fbc0
, fbc1
, fbc2
, fbc3
; /* framebuffer/tile colors */
82 unsigned int fragc0
, fragc1
, fragc2
, fragc3
; /* fragment colors */
87 if (spu
.depth_stencil_alpha
.alpha
.enabled
) {
88 vector
float ref
= spu_splats(spu
.depth_stencil_alpha
.alpha
.ref
);
89 vector
unsigned int amask
;
91 switch (spu
.depth_stencil_alpha
.alpha
.func
) {
93 amask
= spu_cmpgt(ref
, fragA
); /* mask = (fragA < ref) */
95 case PIPE_FUNC_GREATER
:
96 amask
= spu_cmpgt(fragA
, ref
); /* mask = (fragA > ref) */
98 case PIPE_FUNC_GEQUAL
:
99 amask
= spu_cmpgt(ref
, fragA
);
100 amask
= spu_nor(amask
, amask
);
102 case PIPE_FUNC_LEQUAL
:
103 amask
= spu_cmpgt(fragA
, ref
);
104 amask
= spu_nor(amask
, amask
);
106 case PIPE_FUNC_EQUAL
:
107 amask
= spu_cmpeq(ref
, fragA
);
109 case PIPE_FUNC_NOTEQUAL
:
110 amask
= spu_cmpeq(ref
, fragA
);
111 amask
= spu_nor(amask
, amask
);
113 case PIPE_FUNC_ALWAYS
:
114 amask
= spu_splats(0xffffffffU
);
116 case PIPE_FUNC_NEVER
:
117 amask
= spu_splats( 0x0U
);
123 mask
= spu_and(mask
, amask
);
128 * Z and/or stencil testing...
130 if (spu
.depth_stencil_alpha
.depth
.enabled
||
131 spu
.depth_stencil_alpha
.stencil
[0].enabled
) {
133 /* get four Z/Stencil values from tile */
134 vector
unsigned int mask24
= spu_splats((unsigned int)0x00ffffffU
);
135 vector
unsigned int ifbZS
= depthStencilTile
->ui4
[y
/2][x
/2];
136 vector
unsigned int ifbZ
= spu_and(ifbZS
, mask24
);
137 vector
unsigned int ifbS
= spu_andc(ifbZS
, mask24
);
139 if (spu
.depth_stencil_alpha
.stencil
[0].enabled
) {
140 /* do stencil test */
141 ASSERT(spu
.fb
.depth_format
== PIPE_FORMAT_S8Z24_UNORM
);
144 else if (spu
.depth_stencil_alpha
.depth
.enabled
) {
147 ASSERT(spu
.fb
.depth_format
== PIPE_FORMAT_S8Z24_UNORM
||
148 spu
.fb
.depth_format
== PIPE_FORMAT_X8Z24_UNORM
);
150 vector
unsigned int ifragZ
;
151 vector
unsigned int zmask
;
153 /* convert four fragZ from float to uint */
154 fragZ
= spu_mul(fragZ
, spu_splats((float) 0xffffff));
155 ifragZ
= spu_convtu(fragZ
, 0);
157 /* do depth comparison, setting zmask with results */
158 switch (spu
.depth_stencil_alpha
.depth
.func
) {
160 zmask
= spu_cmpgt(ifbZ
, ifragZ
); /* mask = (ifragZ < ifbZ) */
162 case PIPE_FUNC_GREATER
:
163 zmask
= spu_cmpgt(ifragZ
, ifbZ
); /* mask = (ifbZ > ifragZ) */
165 case PIPE_FUNC_GEQUAL
:
166 zmask
= spu_cmpgt(ifbZ
, ifragZ
);
167 zmask
= spu_nor(zmask
, zmask
);
169 case PIPE_FUNC_LEQUAL
:
170 zmask
= spu_cmpgt(ifragZ
, ifbZ
);
171 zmask
= spu_nor(zmask
, zmask
);
173 case PIPE_FUNC_EQUAL
:
174 zmask
= spu_cmpeq(ifbZ
, ifragZ
);
176 case PIPE_FUNC_NOTEQUAL
:
177 zmask
= spu_cmpeq(ifbZ
, ifragZ
);
178 zmask
= spu_nor(zmask
, zmask
);
180 case PIPE_FUNC_ALWAYS
:
181 zmask
= spu_splats(0xffffffffU
);
183 case PIPE_FUNC_NEVER
:
184 zmask
= spu_splats( 0x0U
);
190 mask
= spu_and(mask
, zmask
);
192 /* merge framebuffer Z and fragment Z according to the mask */
193 ifbZ
= spu_or(spu_and(ifragZ
, mask
),
194 spu_andc(ifbZ
, mask
));
197 if (spu_extract(spu_orx(mask
), 0)) {
198 /* put new fragment Z/Stencil values back into Z/Stencil tile */
199 depthStencilTile
->ui4
[y
/2][x
/2] = spu_or(ifbZ
, ifbS
);
201 spu
.cur_ztile_status
= TILE_STATUS_DIRTY
;
207 * If we'll need the current framebuffer/tile colors for blending
208 * or logicop or colormask, fetch them now.
210 if (spu
.blend
.blend_enable
||
211 spu
.blend
.logicop_enable
||
212 spu
.blend
.colormask
!= 0xf) {
214 #if LINEAR_QUAD_LAYOUT /* See comments/diagram below */
215 fbc0
= colorTile
->ui
[y
][x
*2+0];
216 fbc1
= colorTile
->ui
[y
][x
*2+1];
217 fbc2
= colorTile
->ui
[y
][x
*2+2];
218 fbc3
= colorTile
->ui
[y
][x
*2+3];
220 fbc0
= colorTile
->ui
[y
+0][x
+0];
221 fbc1
= colorTile
->ui
[y
+0][x
+1];
222 fbc2
= colorTile
->ui
[y
+1][x
+0];
223 fbc3
= colorTile
->ui
[y
+1][x
+1];
231 if (spu
.blend
.blend_enable
) {
232 /* blending terms, misc regs */
233 vector
float term1r
, term1g
, term1b
, term1a
;
234 vector
float term2r
, term2g
, term2b
, term2a
;
235 vector
float one
, tmp
;
237 vector
float fbRGBA
[4]; /* current framebuffer colors */
239 /* convert framebuffer colors from packed int to vector float */
241 vector
float temp
[4]; /* float colors in AOS form */
242 switch (spu
.fb
.color_format
) {
243 case PIPE_FORMAT_B8G8R8A8_UNORM
:
244 temp
[0] = spu_unpack_B8G8R8A8(fbc0
);
245 temp
[1] = spu_unpack_B8G8R8A8(fbc1
);
246 temp
[2] = spu_unpack_B8G8R8A8(fbc2
);
247 temp
[3] = spu_unpack_B8G8R8A8(fbc3
);
249 case PIPE_FORMAT_A8R8G8B8_UNORM
:
250 temp
[0] = spu_unpack_A8R8G8B8(fbc0
);
251 temp
[1] = spu_unpack_A8R8G8B8(fbc1
);
252 temp
[2] = spu_unpack_A8R8G8B8(fbc2
);
253 temp
[3] = spu_unpack_A8R8G8B8(fbc3
);
258 _transpose_matrix4x4(fbRGBA
, temp
); /* fbRGBA = transpose(temp) */
262 * Compute Src RGB terms (fragment color * factor)
264 switch (spu
.blend
.rgb_src_factor
) {
265 case PIPE_BLENDFACTOR_ONE
:
270 case PIPE_BLENDFACTOR_ZERO
:
273 term1b
= spu_splats(0.0f
);
275 case PIPE_BLENDFACTOR_SRC_COLOR
:
276 term1r
= spu_mul(fragR
, fragR
);
277 term1g
= spu_mul(fragG
, fragG
);
278 term1b
= spu_mul(fragB
, fragB
);
280 case PIPE_BLENDFACTOR_SRC_ALPHA
:
281 term1r
= spu_mul(fragR
, fragA
);
282 term1g
= spu_mul(fragG
, fragA
);
283 term1b
= spu_mul(fragB
, fragA
);
285 case PIPE_BLENDFACTOR_DST_COLOR
:
286 term1r
= spu_mul(fragR
, fbRGBA
[0]);
287 term1g
= spu_mul(fragG
, fbRGBA
[1]);
288 term1b
= spu_mul(fragB
, fbRGBA
[1]);
290 case PIPE_BLENDFACTOR_DST_ALPHA
:
291 term1r
= spu_mul(fragR
, fbRGBA
[3]);
292 term1g
= spu_mul(fragG
, fbRGBA
[3]);
293 term1b
= spu_mul(fragB
, fbRGBA
[3]);
295 case PIPE_BLENDFACTOR_CONST_COLOR
:
296 term1r
= spu_mul(fragR
, spu_splats(spu
.blend_color
.color
[0]));
297 term1g
= spu_mul(fragG
, spu_splats(spu
.blend_color
.color
[1]));
298 term1b
= spu_mul(fragB
, spu_splats(spu
.blend_color
.color
[2]));
300 case PIPE_BLENDFACTOR_CONST_ALPHA
:
301 term1r
= spu_mul(fragR
, spu_splats(spu
.blend_color
.color
[3]));
302 term1g
= spu_mul(fragG
, spu_splats(spu
.blend_color
.color
[3]));
303 term1b
= spu_mul(fragB
, spu_splats(spu
.blend_color
.color
[3]));
311 * Compute Src Alpha term (fragment alpha * factor)
313 switch (spu
.blend
.alpha_src_factor
) {
314 case PIPE_BLENDFACTOR_ONE
:
317 case PIPE_BLENDFACTOR_SRC_COLOR
:
318 term1a
= spu_splats(0.0f
);
320 case PIPE_BLENDFACTOR_SRC_ALPHA
:
321 term1a
= spu_mul(fragA
, fragA
);
323 case PIPE_BLENDFACTOR_DST_COLOR
:
325 case PIPE_BLENDFACTOR_DST_ALPHA
:
326 term1a
= spu_mul(fragA
, fbRGBA
[3]);
328 case PIPE_BLENDFACTOR_CONST_COLOR
:
330 case PIPE_BLENDFACTOR_CONST_ALPHA
:
331 term1a
= spu_mul(fragR
, spu_splats(spu
.blend_color
.color
[3]));
339 * Compute Dest RGB terms (framebuffer color * factor)
341 switch (spu
.blend
.rgb_dst_factor
) {
342 case PIPE_BLENDFACTOR_ONE
:
347 case PIPE_BLENDFACTOR_ZERO
:
350 term2b
= spu_splats(0.0f
);
352 case PIPE_BLENDFACTOR_SRC_COLOR
:
353 term2r
= spu_mul(fbRGBA
[0], fragR
);
354 term2g
= spu_mul(fbRGBA
[1], fragG
);
355 term2b
= spu_mul(fbRGBA
[2], fragB
);
357 case PIPE_BLENDFACTOR_SRC_ALPHA
:
358 term2r
= spu_mul(fbRGBA
[0], fragA
);
359 term2g
= spu_mul(fbRGBA
[1], fragA
);
360 term2b
= spu_mul(fbRGBA
[2], fragA
);
362 case PIPE_BLENDFACTOR_INV_SRC_ALPHA
:
363 one
= spu_splats(1.0f
);
364 tmp
= spu_sub(one
, fragA
);
365 term2r
= spu_mul(fbRGBA
[0], tmp
);
366 term2g
= spu_mul(fbRGBA
[1], tmp
);
367 term2b
= spu_mul(fbRGBA
[2], tmp
);
369 case PIPE_BLENDFACTOR_DST_COLOR
:
370 term2r
= spu_mul(fbRGBA
[0], fbRGBA
[0]);
371 term2g
= spu_mul(fbRGBA
[1], fbRGBA
[1]);
372 term2b
= spu_mul(fbRGBA
[2], fbRGBA
[2]);
374 case PIPE_BLENDFACTOR_DST_ALPHA
:
375 term2r
= spu_mul(fbRGBA
[0], fbRGBA
[3]);
376 term2g
= spu_mul(fbRGBA
[1], fbRGBA
[3]);
377 term2b
= spu_mul(fbRGBA
[2], fbRGBA
[3]);
379 case PIPE_BLENDFACTOR_CONST_COLOR
:
380 term2r
= spu_mul(fbRGBA
[0], spu_splats(spu
.blend_color
.color
[0]));
381 term2g
= spu_mul(fbRGBA
[1], spu_splats(spu
.blend_color
.color
[1]));
382 term2b
= spu_mul(fbRGBA
[2], spu_splats(spu
.blend_color
.color
[2]));
384 case PIPE_BLENDFACTOR_CONST_ALPHA
:
385 term2r
= spu_mul(fbRGBA
[0], spu_splats(spu
.blend_color
.color
[3]));
386 term2g
= spu_mul(fbRGBA
[1], spu_splats(spu
.blend_color
.color
[3]));
387 term2b
= spu_mul(fbRGBA
[2], spu_splats(spu
.blend_color
.color
[3]));
395 * Compute Dest Alpha term (framebuffer alpha * factor)
397 switch (spu
.blend
.alpha_dst_factor
) {
398 case PIPE_BLENDFACTOR_ONE
:
401 case PIPE_BLENDFACTOR_SRC_COLOR
:
402 term2a
= spu_splats(0.0f
);
404 case PIPE_BLENDFACTOR_SRC_ALPHA
:
405 term2a
= spu_mul(fbRGBA
[3], fragA
);
407 case PIPE_BLENDFACTOR_INV_SRC_ALPHA
:
408 one
= spu_splats(1.0f
);
409 tmp
= spu_sub(one
, fragA
);
410 term2a
= spu_mul(fbRGBA
[3], tmp
);
412 case PIPE_BLENDFACTOR_DST_COLOR
:
414 case PIPE_BLENDFACTOR_DST_ALPHA
:
415 term2a
= spu_mul(fbRGBA
[3], fbRGBA
[3]);
417 case PIPE_BLENDFACTOR_CONST_COLOR
:
419 case PIPE_BLENDFACTOR_CONST_ALPHA
:
420 term2a
= spu_mul(fbRGBA
[3], spu_splats(spu
.blend_color
.color
[3]));
428 * Combine Src/Dest RGB terms
430 switch (spu
.blend
.rgb_func
) {
432 fragR
= spu_add(term1r
, term2r
);
433 fragG
= spu_add(term1g
, term2g
);
434 fragB
= spu_add(term1b
, term2b
);
436 case PIPE_BLEND_SUBTRACT
:
437 fragR
= spu_sub(term1r
, term2r
);
438 fragG
= spu_sub(term1g
, term2g
);
439 fragB
= spu_sub(term1b
, term2b
);
441 case PIPE_BLEND_REVERSE_SUBTRACT
:
442 fragR
= spu_sub(term2r
, term1r
);
443 fragG
= spu_sub(term2g
, term1g
);
444 fragB
= spu_sub(term2b
, term1b
);
447 fragR
= spu_min(term1r
, term2r
);
448 fragG
= spu_min(term1g
, term2g
);
449 fragB
= spu_min(term1b
, term2b
);
452 fragR
= spu_max(term1r
, term2r
);
453 fragG
= spu_max(term1g
, term2g
);
454 fragB
= spu_max(term1b
, term2b
);
461 * Combine Src/Dest A term
463 switch (spu
.blend
.alpha_func
) {
465 fragA
= spu_add(term1a
, term2a
);
467 case PIPE_BLEND_SUBTRACT
:
468 fragA
= spu_sub(term1a
, term2a
);
470 case PIPE_BLEND_REVERSE_SUBTRACT
:
471 fragA
= spu_sub(term2a
, term1a
);
474 fragA
= spu_min(term1a
, term2a
);
477 fragA
= spu_max(term1a
, term2a
);
486 * Convert RRRR,GGGG,BBBB,AAAA to RGBA,RGBA,RGBA,RGBA.
491 vector
float frag_soa
[4];
496 _transpose_matrix4x4(frag_aos
, frag_soa
);
499 /* short-cut relying on function parameter layout: */
500 _transpose_matrix4x4(frag_aos
, &fragR
);
506 * Pack fragment float colors into 32-bit RGBA words.
508 switch (spu
.fb
.color_format
) {
509 case PIPE_FORMAT_A8R8G8B8_UNORM
:
510 fragc0
= spu_pack_A8R8G8B8(frag_aos
[0]);
511 fragc1
= spu_pack_A8R8G8B8(frag_aos
[1]);
512 fragc2
= spu_pack_A8R8G8B8(frag_aos
[2]);
513 fragc3
= spu_pack_A8R8G8B8(frag_aos
[3]);
515 case PIPE_FORMAT_B8G8R8A8_UNORM
:
516 fragc0
= spu_pack_B8G8R8A8(frag_aos
[0]);
517 fragc1
= spu_pack_B8G8R8A8(frag_aos
[1]);
518 fragc2
= spu_pack_B8G8R8A8(frag_aos
[2]);
519 fragc3
= spu_pack_B8G8R8A8(frag_aos
[3]);
522 fprintf(stderr
, "SPU: Bad pixel format in spu_default_fragment_ops\n");
530 if (spu
.blend
.colormask
!= 0xf) {
531 uint cmask
= 0x0; /* each byte corresponds to a color channel */
533 /* Form bitmask depending on color buffer format and colormask bits */
534 switch (spu
.fb
.color_format
) {
535 case PIPE_FORMAT_A8R8G8B8_UNORM
:
536 if (spu
.blend
.colormask
& PIPE_MASK_R
)
537 cmask
|= 0x00ff0000; /* red */
538 if (spu
.blend
.colormask
& PIPE_MASK_G
)
539 cmask
|= 0x0000ff00; /* green */
540 if (spu
.blend
.colormask
& PIPE_MASK_B
)
541 cmask
|= 0x000000ff; /* blue */
542 if (spu
.blend
.colormask
& PIPE_MASK_A
)
543 cmask
|= 0xff000000; /* alpha */
545 case PIPE_FORMAT_B8G8R8A8_UNORM
:
546 if (spu
.blend
.colormask
& PIPE_MASK_R
)
547 cmask
|= 0x0000ff00; /* red */
548 if (spu
.blend
.colormask
& PIPE_MASK_G
)
549 cmask
|= 0x00ff0000; /* green */
550 if (spu
.blend
.colormask
& PIPE_MASK_B
)
551 cmask
|= 0xff000000; /* blue */
552 if (spu
.blend
.colormask
& PIPE_MASK_A
)
553 cmask
|= 0x000000ff; /* alpha */
560 * Apply color mask to the 32-bit packed colors.
562 * frag color[i] = frag color[i];
564 * frag color[i] = framebuffer color[i];
566 fragc0
= (fragc0
& cmask
) | (fbc0
& ~cmask
);
567 fragc1
= (fragc1
& cmask
) | (fbc1
& ~cmask
);
568 fragc2
= (fragc2
& cmask
) | (fbc2
& ~cmask
);
569 fragc3
= (fragc3
& cmask
) | (fbc3
& ~cmask
);
576 if (spu
.blend
.logicop_enable
) {
578 /* apply logicop to 32-bit packed colors (fragcx and fbcx) */
583 * If mask is non-zero, mark tile as dirty.
585 if (spu_extract(spu_orx(mask
), 0)) {
586 spu
.cur_ctile_status
= TILE_STATUS_DIRTY
;
589 /* write no fragments */
595 * Write new fragment/quad colors to the framebuffer/tile.
596 * Only write pixels where the corresponding mask word is set.
598 #if LINEAR_QUAD_LAYOUT
605 if (spu_extract(mask
, 0))
606 colorTile
->ui
[y
][x
*2] = fragc0
;
607 if (spu_extract(mask
, 1))
608 colorTile
->ui
[y
][x
*2+1] = fragc1
;
609 if (spu_extract(mask
, 2))
610 colorTile
->ui
[y
][x
*2+2] = fragc2
;
611 if (spu_extract(mask
, 3))
612 colorTile
->ui
[y
][x
*2+3] = fragc3
;
622 if (spu_extract(mask
, 0))
623 colorTile
->ui
[y
+0][x
+0] = fragc0
;
624 if (spu_extract(mask
, 1))
625 colorTile
->ui
[y
+0][x
+1] = fragc1
;
626 if (spu_extract(mask
, 2))
627 colorTile
->ui
[y
+1][x
+0] = fragc2
;
628 if (spu_extract(mask
, 3))
629 colorTile
->ui
[y
+1][x
+1] = fragc3
;