1 /**************************************************************************
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
33 #include <transpose_matrix4x4.h>
34 #include "pipe/p_format.h"
36 #include "spu_colorpack.h"
37 #include "spu_per_fragment_op.h"
40 #define LINEAR_QUAD_LAYOUT 1
44 * Called by rasterizer for each quad after the shader has run. Do
45 * all the per-fragment operations including alpha test, z test,
46 * stencil test, blend, colormask and logicops. This is a
47 * fallback/debug function. In reality we'll use a generated function
48 * produced by the PPU. But this function is useful for
52 spu_fallback_fragment_ops(uint x
, uint y
,
54 tile_t
*depthStencilTile
,
60 vector
unsigned int mask
,
63 vector
float frag_aos
[4];
64 unsigned int fbc0
, fbc1
, fbc2
, fbc3
; /* framebuffer/tile colors */
65 unsigned int fragc0
, fragc1
, fragc2
, fragc3
; /* fragment colors */
70 if (spu
.depth_stencil_alpha
.alpha
.enabled
) {
71 vector
float ref
= spu_splats(spu
.depth_stencil_alpha
.alpha
.ref
);
72 vector
unsigned int amask
;
74 switch (spu
.depth_stencil_alpha
.alpha
.func
) {
76 amask
= spu_cmpgt(ref
, fragA
); /* mask = (fragA < ref) */
78 case PIPE_FUNC_GREATER
:
79 amask
= spu_cmpgt(fragA
, ref
); /* mask = (fragA > ref) */
81 case PIPE_FUNC_GEQUAL
:
82 amask
= spu_cmpgt(ref
, fragA
);
83 amask
= spu_nor(amask
, amask
);
85 case PIPE_FUNC_LEQUAL
:
86 amask
= spu_cmpgt(fragA
, ref
);
87 amask
= spu_nor(amask
, amask
);
90 amask
= spu_cmpeq(ref
, fragA
);
92 case PIPE_FUNC_NOTEQUAL
:
93 amask
= spu_cmpeq(ref
, fragA
);
94 amask
= spu_nor(amask
, amask
);
96 case PIPE_FUNC_ALWAYS
:
97 amask
= spu_splats(0xffffffffU
);
100 amask
= spu_splats( 0x0U
);
106 mask
= spu_and(mask
, amask
);
111 * Z and/or stencil testing...
113 if (spu
.depth_stencil_alpha
.depth
.enabled
||
114 spu
.depth_stencil_alpha
.stencil
[0].enabled
) {
116 /* get four Z/Stencil values from tile */
117 vector
unsigned int mask24
= spu_splats((unsigned int)0x00ffffffU
);
118 vector
unsigned int ifbZS
= depthStencilTile
->ui4
[y
/2][x
/2];
119 vector
unsigned int ifbZ
= spu_and(ifbZS
, mask24
);
120 vector
unsigned int ifbS
= spu_andc(ifbZS
, mask24
);
122 if (spu
.depth_stencil_alpha
.stencil
[0].enabled
) {
123 /* do stencil test */
124 ASSERT(spu
.fb
.depth_format
== PIPE_FORMAT_S8Z24_UNORM
);
127 else if (spu
.depth_stencil_alpha
.depth
.enabled
) {
130 ASSERT(spu
.fb
.depth_format
== PIPE_FORMAT_S8Z24_UNORM
||
131 spu
.fb
.depth_format
== PIPE_FORMAT_X8Z24_UNORM
);
133 vector
unsigned int ifragZ
;
134 vector
unsigned int zmask
;
136 /* convert four fragZ from float to uint */
137 fragZ
= spu_mul(fragZ
, spu_splats((float) 0xffffff));
138 ifragZ
= spu_convtu(fragZ
, 0);
140 /* do depth comparison, setting zmask with results */
141 switch (spu
.depth_stencil_alpha
.depth
.func
) {
143 zmask
= spu_cmpgt(ifbZ
, ifragZ
); /* mask = (ifragZ < ifbZ) */
145 case PIPE_FUNC_GREATER
:
146 zmask
= spu_cmpgt(ifragZ
, ifbZ
); /* mask = (ifbZ > ifragZ) */
148 case PIPE_FUNC_GEQUAL
:
149 zmask
= spu_cmpgt(ifbZ
, ifragZ
);
150 zmask
= spu_nor(zmask
, zmask
);
152 case PIPE_FUNC_LEQUAL
:
153 zmask
= spu_cmpgt(ifragZ
, ifbZ
);
154 zmask
= spu_nor(zmask
, zmask
);
156 case PIPE_FUNC_EQUAL
:
157 zmask
= spu_cmpeq(ifbZ
, ifragZ
);
159 case PIPE_FUNC_NOTEQUAL
:
160 zmask
= spu_cmpeq(ifbZ
, ifragZ
);
161 zmask
= spu_nor(zmask
, zmask
);
163 case PIPE_FUNC_ALWAYS
:
164 zmask
= spu_splats(0xffffffffU
);
166 case PIPE_FUNC_NEVER
:
167 zmask
= spu_splats( 0x0U
);
173 mask
= spu_and(mask
, zmask
);
175 /* merge framebuffer Z and fragment Z according to the mask */
176 ifbZ
= spu_or(spu_and(ifragZ
, mask
),
177 spu_andc(ifbZ
, mask
));
180 if (spu_extract(spu_orx(mask
), 0)) {
181 /* put new fragment Z/Stencil values back into Z/Stencil tile */
182 depthStencilTile
->ui4
[y
/2][x
/2] = spu_or(ifbZ
, ifbS
);
184 spu
.cur_ztile_status
= TILE_STATUS_DIRTY
;
190 * If we'll need the current framebuffer/tile colors for blending
191 * or logicop or colormask, fetch them now.
193 if (spu
.blend
.blend_enable
||
194 spu
.blend
.logicop_enable
||
195 spu
.blend
.colormask
!= 0xf) {
197 #if LINEAR_QUAD_LAYOUT /* See comments/diagram below */
198 fbc0
= colorTile
->ui
[y
][x
*2+0];
199 fbc1
= colorTile
->ui
[y
][x
*2+1];
200 fbc2
= colorTile
->ui
[y
][x
*2+2];
201 fbc3
= colorTile
->ui
[y
][x
*2+3];
203 fbc0
= colorTile
->ui
[y
+0][x
+0];
204 fbc1
= colorTile
->ui
[y
+0][x
+1];
205 fbc2
= colorTile
->ui
[y
+1][x
+0];
206 fbc3
= colorTile
->ui
[y
+1][x
+1];
214 if (spu
.blend
.blend_enable
) {
215 /* blending terms, misc regs */
216 vector
float term1r
, term1g
, term1b
, term1a
;
217 vector
float term2r
, term2g
, term2b
, term2a
;
218 vector
float one
, tmp
;
220 vector
float fbRGBA
[4]; /* current framebuffer colors */
222 /* convert framebuffer colors from packed int to vector float */
224 vector
float temp
[4]; /* float colors in AOS form */
225 switch (spu
.fb
.color_format
) {
226 case PIPE_FORMAT_B8G8R8A8_UNORM
:
227 temp
[0] = spu_unpack_B8G8R8A8(fbc0
);
228 temp
[1] = spu_unpack_B8G8R8A8(fbc1
);
229 temp
[2] = spu_unpack_B8G8R8A8(fbc2
);
230 temp
[3] = spu_unpack_B8G8R8A8(fbc3
);
232 case PIPE_FORMAT_A8R8G8B8_UNORM
:
233 temp
[0] = spu_unpack_A8R8G8B8(fbc0
);
234 temp
[1] = spu_unpack_A8R8G8B8(fbc1
);
235 temp
[2] = spu_unpack_A8R8G8B8(fbc2
);
236 temp
[3] = spu_unpack_A8R8G8B8(fbc3
);
241 _transpose_matrix4x4(fbRGBA
, temp
); /* fbRGBA = transpose(temp) */
245 * Compute Src RGB terms
247 switch (spu
.blend
.rgb_src_factor
) {
248 case PIPE_BLENDFACTOR_ONE
:
253 case PIPE_BLENDFACTOR_ZERO
:
256 term1b
= spu_splats(0.0f
);
258 case PIPE_BLENDFACTOR_SRC_COLOR
:
259 term1r
= spu_mul(fragR
, fragR
);
260 term1g
= spu_mul(fragG
, fragG
);
261 term1b
= spu_mul(fragB
, fragB
);
263 case PIPE_BLENDFACTOR_SRC_ALPHA
:
264 term1r
= spu_mul(fragR
, fragA
);
265 term1g
= spu_mul(fragG
, fragA
);
266 term1b
= spu_mul(fragB
, fragA
);
274 * Compute Src Alpha term
276 switch (spu
.blend
.alpha_src_factor
) {
277 case PIPE_BLENDFACTOR_ONE
:
280 case PIPE_BLENDFACTOR_SRC_COLOR
:
281 term1a
= spu_splats(0.0f
);
283 case PIPE_BLENDFACTOR_SRC_ALPHA
:
284 term1a
= spu_mul(fragA
, fragA
);
292 * Compute Dest RGB terms
294 switch (spu
.blend
.rgb_dst_factor
) {
295 case PIPE_BLENDFACTOR_ONE
:
300 case PIPE_BLENDFACTOR_ZERO
:
303 term2b
= spu_splats(0.0f
);
305 case PIPE_BLENDFACTOR_SRC_COLOR
:
306 term2r
= spu_mul(fbRGBA
[0], fragR
);
307 term2g
= spu_mul(fbRGBA
[1], fragG
);
308 term2b
= spu_mul(fbRGBA
[2], fragB
);
310 case PIPE_BLENDFACTOR_SRC_ALPHA
:
311 term2r
= spu_mul(fbRGBA
[0], fragA
);
312 term2g
= spu_mul(fbRGBA
[1], fragA
);
313 term2b
= spu_mul(fbRGBA
[2], fragA
);
315 case PIPE_BLENDFACTOR_INV_SRC_ALPHA
:
316 one
= spu_splats(1.0f
);
317 tmp
= spu_sub(one
, fragA
);
318 term2r
= spu_mul(fbRGBA
[0], tmp
);
319 term2g
= spu_mul(fbRGBA
[1], tmp
);
320 term2b
= spu_mul(fbRGBA
[2], tmp
);
328 * Compute Dest Alpha term
330 switch (spu
.blend
.alpha_dst_factor
) {
331 case PIPE_BLENDFACTOR_ONE
:
334 case PIPE_BLENDFACTOR_SRC_COLOR
:
335 term2a
= spu_splats(0.0f
);
337 case PIPE_BLENDFACTOR_SRC_ALPHA
:
338 term2a
= spu_mul(fbRGBA
[3], fragA
);
340 case PIPE_BLENDFACTOR_INV_SRC_ALPHA
:
341 one
= spu_splats(1.0f
);
342 tmp
= spu_sub(one
, fragA
);
343 term2a
= spu_mul(fbRGBA
[3], tmp
);
351 * Combine Src/Dest RGB terms
353 switch (spu
.blend
.rgb_func
) {
355 fragR
= spu_add(term1r
, term2r
);
356 fragG
= spu_add(term1g
, term2g
);
357 fragB
= spu_add(term1b
, term2b
);
359 case PIPE_BLEND_SUBTRACT
:
360 fragR
= spu_sub(term1r
, term2r
);
361 fragG
= spu_sub(term1g
, term2g
);
362 fragB
= spu_sub(term1b
, term2b
);
370 * Combine Src/Dest A term
372 switch (spu
.blend
.alpha_func
) {
374 fragA
= spu_add(term1a
, term2a
);
376 case PIPE_BLEND_SUBTRACT
:
377 fragA
= spu_sub(term1a
, term2a
);
387 * Convert RRRR,GGGG,BBBB,AAAA to RGBA,RGBA,RGBA,RGBA.
392 vector
float frag_soa
[4];
397 _transpose_matrix4x4(frag_aos
, frag_soa
);
400 /* short-cut relying on function parameter layout: */
401 _transpose_matrix4x4(frag_aos
, &fragR
);
407 * Pack fragment float colors into 32-bit RGBA words.
409 switch (spu
.fb
.color_format
) {
410 case PIPE_FORMAT_A8R8G8B8_UNORM
:
411 fragc0
= spu_pack_A8R8G8B8(frag_aos
[0]);
412 fragc1
= spu_pack_A8R8G8B8(frag_aos
[1]);
413 fragc2
= spu_pack_A8R8G8B8(frag_aos
[2]);
414 fragc3
= spu_pack_A8R8G8B8(frag_aos
[3]);
416 case PIPE_FORMAT_B8G8R8A8_UNORM
:
417 fragc0
= spu_pack_B8G8R8A8(frag_aos
[0]);
418 fragc1
= spu_pack_B8G8R8A8(frag_aos
[1]);
419 fragc2
= spu_pack_B8G8R8A8(frag_aos
[2]);
420 fragc3
= spu_pack_B8G8R8A8(frag_aos
[3]);
423 fprintf(stderr
, "SPU: Bad pixel format in spu_default_fragment_ops\n");
431 if (spu
.blend
.colormask
!= 0xf) {
432 uint cmask
= 0x0; /* each byte corresponds to a color channel */
434 /* Form bitmask depending on color buffer format and colormask bits */
435 switch (spu
.fb
.color_format
) {
436 case PIPE_FORMAT_A8R8G8B8_UNORM
:
437 if (spu
.blend
.colormask
& PIPE_MASK_R
)
438 cmask
|= 0x00ff0000; /* red */
439 if (spu
.blend
.colormask
& PIPE_MASK_G
)
440 cmask
|= 0x0000ff00; /* green */
441 if (spu
.blend
.colormask
& PIPE_MASK_B
)
442 cmask
|= 0x000000ff; /* blue */
443 if (spu
.blend
.colormask
& PIPE_MASK_A
)
444 cmask
|= 0xff000000; /* alpha */
446 case PIPE_FORMAT_B8G8R8A8_UNORM
:
447 if (spu
.blend
.colormask
& PIPE_MASK_R
)
448 cmask
|= 0x0000ff00; /* red */
449 if (spu
.blend
.colormask
& PIPE_MASK_G
)
450 cmask
|= 0x00ff0000; /* green */
451 if (spu
.blend
.colormask
& PIPE_MASK_B
)
452 cmask
|= 0xff000000; /* blue */
453 if (spu
.blend
.colormask
& PIPE_MASK_A
)
454 cmask
|= 0x000000ff; /* alpha */
461 * Apply color mask to the 32-bit packed colors.
463 * frag color[i] = frag color[i];
465 * frag color[i] = framebuffer color[i];
467 fragc0
= (fragc0
& cmask
) | (fbc0
& ~cmask
);
468 fragc1
= (fragc1
& cmask
) | (fbc1
& ~cmask
);
469 fragc2
= (fragc2
& cmask
) | (fbc2
& ~cmask
);
470 fragc3
= (fragc3
& cmask
) | (fbc3
& ~cmask
);
477 if (spu
.blend
.logicop_enable
) {
479 /* apply logicop to 32-bit packed colors (fragcx and fbcx) */
484 * If mask is non-zero, mark tile as dirty.
486 if (spu_extract(spu_orx(mask
), 0)) {
487 spu
.cur_ctile_status
= TILE_STATUS_DIRTY
;
490 /* write no fragments */
496 * Write new fragment/quad colors to the framebuffer/tile.
497 * Only write pixels where the corresponding mask word is set.
499 #if LINEAR_QUAD_LAYOUT
506 if (spu_extract(mask
, 0))
507 colorTile
->ui
[y
][x
*2] = fragc0
;
508 if (spu_extract(mask
, 1))
509 colorTile
->ui
[y
][x
*2+1] = fragc1
;
510 if (spu_extract(mask
, 2))
511 colorTile
->ui
[y
][x
*2+2] = fragc2
;
512 if (spu_extract(mask
, 3))
513 colorTile
->ui
[y
][x
*2+3] = fragc3
;
523 if (spu_extract(mask
, 0))
524 colorTile
->ui
[y
+0][x
+0] = fragc0
;
525 if (spu_extract(mask
, 1))
526 colorTile
->ui
[y
+0][x
+1] = fragc1
;
527 if (spu_extract(mask
, 2))
528 colorTile
->ui
[y
+1][x
+0] = fragc2
;
529 if (spu_extract(mask
, 3))
530 colorTile
->ui
[y
+1][x
+1] = fragc3
;