1 /**************************************************************************
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
33 #include <transpose_matrix4x4.h>
34 #include "pipe/p_format.h"
36 #include "spu_colorpack.h"
37 #include "spu_per_fragment_op.h"
40 #define LINEAR_QUAD_LAYOUT 1
44 * Called by rasterizer for each quad after the shader has run. Do
45 * all the per-fragment operations including alpha test, z test,
46 * stencil test, blend, colormask and logicops. This is a
47 * fallback/debug function. In reality we'll use a generated function
48 * produced by the PPU. But this function is useful for
52 spu_fallback_fragment_ops(uint x
, uint y
,
54 tile_t
*depthStencilTile
,
60 vector
unsigned int mask
)
62 vector
float frag_aos
[4];
63 unsigned int fbc0
, fbc1
, fbc2
, fbc3
; /* framebuffer/tile colors */
64 unsigned int fragc0
, fragc1
, fragc2
, fragc3
; /* fragment colors */
69 if (spu
.depth_stencil_alpha
.alpha
.enabled
) {
70 vector
float ref
= spu_splats(spu
.depth_stencil_alpha
.alpha
.ref
);
71 vector
unsigned int amask
;
73 switch (spu
.depth_stencil_alpha
.alpha
.func
) {
75 amask
= spu_cmpgt(ref
, fragA
); /* mask = (fragA < ref) */
77 case PIPE_FUNC_GREATER
:
78 amask
= spu_cmpgt(fragA
, ref
); /* mask = (fragA > ref) */
80 case PIPE_FUNC_GEQUAL
:
81 amask
= spu_cmpgt(ref
, fragA
);
82 amask
= spu_nor(amask
, amask
);
84 case PIPE_FUNC_LEQUAL
:
85 amask
= spu_cmpgt(fragA
, ref
);
86 amask
= spu_nor(amask
, amask
);
89 amask
= spu_cmpeq(ref
, fragA
);
91 case PIPE_FUNC_NOTEQUAL
:
92 amask
= spu_cmpeq(ref
, fragA
);
93 amask
= spu_nor(amask
, amask
);
95 case PIPE_FUNC_ALWAYS
:
96 amask
= spu_splats(0xffffffffU
);
99 amask
= spu_splats( 0x0U
);
105 mask
= spu_and(mask
, amask
);
110 * Z and/or stencil testing...
112 if (spu
.depth_stencil_alpha
.depth
.enabled
||
113 spu
.depth_stencil_alpha
.stencil
[0].enabled
) {
115 /* get four Z/Stencil values from tile */
116 vector
unsigned int mask24
= spu_splats((unsigned int)0x00ffffffU
);
117 vector
unsigned int ifbZS
= depthStencilTile
->ui4
[y
/2][x
/2];
118 vector
unsigned int ifbZ
= spu_and(ifbZS
, mask24
);
119 vector
unsigned int ifbS
= spu_andc(ifbZS
, mask24
);
121 if (spu
.depth_stencil_alpha
.stencil
[0].enabled
) {
122 /* do stencil test */
123 ASSERT(spu
.fb
.depth_format
== PIPE_FORMAT_S8Z24_UNORM
);
126 else if (spu
.depth_stencil_alpha
.depth
.enabled
) {
129 ASSERT(spu
.fb
.depth_format
== PIPE_FORMAT_S8Z24_UNORM
||
130 spu
.fb
.depth_format
== PIPE_FORMAT_X8Z24_UNORM
);
132 vector
unsigned int ifragZ
;
133 vector
unsigned int zmask
;
135 /* convert four fragZ from float to uint */
136 fragZ
= spu_mul(fragZ
, spu_splats((float) 0xffffff));
137 ifragZ
= spu_convtu(fragZ
, 0);
139 /* do depth comparison, setting zmask with results */
140 switch (spu
.depth_stencil_alpha
.depth
.func
) {
142 zmask
= spu_cmpgt(ifbZ
, ifragZ
); /* mask = (ifragZ < ifbZ) */
144 case PIPE_FUNC_GREATER
:
145 zmask
= spu_cmpgt(ifragZ
, ifbZ
); /* mask = (ifbZ > ifragZ) */
147 case PIPE_FUNC_GEQUAL
:
148 zmask
= spu_cmpgt(ifbZ
, ifragZ
);
149 zmask
= spu_nor(zmask
, zmask
);
151 case PIPE_FUNC_LEQUAL
:
152 zmask
= spu_cmpgt(ifragZ
, ifbZ
);
153 zmask
= spu_nor(zmask
, zmask
);
155 case PIPE_FUNC_EQUAL
:
156 zmask
= spu_cmpeq(ifbZ
, ifragZ
);
158 case PIPE_FUNC_NOTEQUAL
:
159 zmask
= spu_cmpeq(ifbZ
, ifragZ
);
160 zmask
= spu_nor(zmask
, zmask
);
162 case PIPE_FUNC_ALWAYS
:
163 zmask
= spu_splats(0xffffffffU
);
165 case PIPE_FUNC_NEVER
:
166 zmask
= spu_splats( 0x0U
);
172 mask
= spu_and(mask
, zmask
);
174 /* merge framebuffer Z and fragment Z according to the mask */
175 ifbZ
= spu_or(spu_and(ifragZ
, mask
),
176 spu_andc(ifbZ
, mask
));
179 if (spu_extract(spu_orx(mask
), 0)) {
180 /* put new fragment Z/Stencil values back into Z/Stencil tile */
181 depthStencilTile
->ui4
[y
/2][x
/2] = spu_or(ifbZ
, ifbS
);
183 spu
.cur_ztile_status
= TILE_STATUS_DIRTY
;
189 * If we'll need the current framebuffer/tile colors for blending
190 * or logicop or colormask, fetch them now.
192 if (spu
.blend
.blend_enable
||
193 spu
.blend
.logicop_enable
||
194 spu
.blend
.colormask
!= 0xf) {
196 #if LINEAR_QUAD_LAYOUT /* See comments/diagram below */
197 fbc0
= colorTile
->ui
[y
][x
*2+0];
198 fbc1
= colorTile
->ui
[y
][x
*2+1];
199 fbc2
= colorTile
->ui
[y
][x
*2+2];
200 fbc3
= colorTile
->ui
[y
][x
*2+3];
202 fbc0
= colorTile
->ui
[y
+0][x
+0];
203 fbc1
= colorTile
->ui
[y
+0][x
+1];
204 fbc2
= colorTile
->ui
[y
+1][x
+0];
205 fbc3
= colorTile
->ui
[y
+1][x
+1];
213 if (spu
.blend
.blend_enable
) {
214 /* blending terms, misc regs */
215 vector
float term1r
, term1g
, term1b
, term1a
;
216 vector
float term2r
, term2g
, term2b
, term2a
;
217 vector
float one
, tmp
;
219 vector
float fbRGBA
[4]; /* current framebuffer colors */
221 /* convert framebuffer colors from packed int to vector float */
223 vector
float temp
[4]; /* float colors in AOS form */
224 switch (spu
.fb
.color_format
) {
225 case PIPE_FORMAT_B8G8R8A8_UNORM
:
226 temp
[0] = spu_unpack_B8G8R8A8(fbc0
);
227 temp
[1] = spu_unpack_B8G8R8A8(fbc1
);
228 temp
[2] = spu_unpack_B8G8R8A8(fbc2
);
229 temp
[3] = spu_unpack_B8G8R8A8(fbc3
);
231 case PIPE_FORMAT_A8R8G8B8_UNORM
:
232 temp
[0] = spu_unpack_A8R8G8B8(fbc0
);
233 temp
[1] = spu_unpack_A8R8G8B8(fbc1
);
234 temp
[2] = spu_unpack_A8R8G8B8(fbc2
);
235 temp
[3] = spu_unpack_A8R8G8B8(fbc3
);
240 _transpose_matrix4x4(fbRGBA
, temp
); /* fbRGBA = transpose(temp) */
244 * Compute Src RGB terms
246 switch (spu
.blend
.rgb_src_factor
) {
247 case PIPE_BLENDFACTOR_ONE
:
252 case PIPE_BLENDFACTOR_ZERO
:
255 term1b
= spu_splats(0.0f
);
257 case PIPE_BLENDFACTOR_SRC_COLOR
:
258 term1r
= spu_mul(fragR
, fragR
);
259 term1g
= spu_mul(fragG
, fragG
);
260 term1b
= spu_mul(fragB
, fragB
);
262 case PIPE_BLENDFACTOR_SRC_ALPHA
:
263 term1r
= spu_mul(fragR
, fragA
);
264 term1g
= spu_mul(fragG
, fragA
);
265 term1b
= spu_mul(fragB
, fragA
);
273 * Compute Src Alpha term
275 switch (spu
.blend
.alpha_src_factor
) {
276 case PIPE_BLENDFACTOR_ONE
:
279 case PIPE_BLENDFACTOR_SRC_COLOR
:
280 term1a
= spu_splats(0.0f
);
282 case PIPE_BLENDFACTOR_SRC_ALPHA
:
283 term1a
= spu_mul(fragA
, fragA
);
291 * Compute Dest RGB terms
293 switch (spu
.blend
.rgb_dst_factor
) {
294 case PIPE_BLENDFACTOR_ONE
:
299 case PIPE_BLENDFACTOR_ZERO
:
302 term2b
= spu_splats(0.0f
);
304 case PIPE_BLENDFACTOR_SRC_COLOR
:
305 term2r
= spu_mul(fbRGBA
[0], fragR
);
306 term2g
= spu_mul(fbRGBA
[1], fragG
);
307 term2b
= spu_mul(fbRGBA
[2], fragB
);
309 case PIPE_BLENDFACTOR_SRC_ALPHA
:
310 term2r
= spu_mul(fbRGBA
[0], fragA
);
311 term2g
= spu_mul(fbRGBA
[1], fragA
);
312 term2b
= spu_mul(fbRGBA
[2], fragA
);
314 case PIPE_BLENDFACTOR_INV_SRC_ALPHA
:
315 one
= spu_splats(1.0f
);
316 tmp
= spu_sub(one
, fragA
);
317 term2r
= spu_mul(fbRGBA
[0], tmp
);
318 term2g
= spu_mul(fbRGBA
[1], tmp
);
319 term2b
= spu_mul(fbRGBA
[2], tmp
);
327 * Compute Dest Alpha term
329 switch (spu
.blend
.alpha_dst_factor
) {
330 case PIPE_BLENDFACTOR_ONE
:
333 case PIPE_BLENDFACTOR_SRC_COLOR
:
334 term2a
= spu_splats(0.0f
);
336 case PIPE_BLENDFACTOR_SRC_ALPHA
:
337 term2a
= spu_mul(fbRGBA
[3], fragA
);
339 case PIPE_BLENDFACTOR_INV_SRC_ALPHA
:
340 one
= spu_splats(1.0f
);
341 tmp
= spu_sub(one
, fragA
);
342 term2a
= spu_mul(fbRGBA
[3], tmp
);
350 * Combine Src/Dest RGB terms
352 switch (spu
.blend
.rgb_func
) {
354 fragR
= spu_add(term1r
, term2r
);
355 fragG
= spu_add(term1g
, term2g
);
356 fragB
= spu_add(term1b
, term2b
);
358 case PIPE_BLEND_SUBTRACT
:
359 fragR
= spu_sub(term1r
, term2r
);
360 fragG
= spu_sub(term1g
, term2g
);
361 fragB
= spu_sub(term1b
, term2b
);
369 * Combine Src/Dest A term
371 switch (spu
.blend
.alpha_func
) {
373 fragA
= spu_add(term1a
, term2a
);
375 case PIPE_BLEND_SUBTRACT
:
376 fragA
= spu_sub(term1a
, term2a
);
386 * Convert RRRR,GGGG,BBBB,AAAA to RGBA,RGBA,RGBA,RGBA.
391 vector
float frag_soa
[4];
396 _transpose_matrix4x4(frag_aos
, frag_soa
);
399 /* short-cut relying on function parameter layout: */
400 _transpose_matrix4x4(frag_aos
, &fragR
);
406 * Pack fragment float colors into 32-bit RGBA words.
408 switch (spu
.fb
.color_format
) {
409 case PIPE_FORMAT_A8R8G8B8_UNORM
:
410 fragc0
= spu_pack_A8R8G8B8(frag_aos
[0]);
411 fragc1
= spu_pack_A8R8G8B8(frag_aos
[1]);
412 fragc2
= spu_pack_A8R8G8B8(frag_aos
[2]);
413 fragc3
= spu_pack_A8R8G8B8(frag_aos
[3]);
415 case PIPE_FORMAT_B8G8R8A8_UNORM
:
416 fragc0
= spu_pack_B8G8R8A8(frag_aos
[0]);
417 fragc1
= spu_pack_B8G8R8A8(frag_aos
[1]);
418 fragc2
= spu_pack_B8G8R8A8(frag_aos
[2]);
419 fragc3
= spu_pack_B8G8R8A8(frag_aos
[3]);
422 fprintf(stderr
, "SPU: Bad pixel format in spu_default_fragment_ops\n");
430 if (spu
.blend
.colormask
!= 0xf) {
431 uint cmask
= 0x0; /* each byte corresponds to a color channel */
433 /* Form bitmask depending on color buffer format and colormask bits */
434 switch (spu
.fb
.color_format
) {
435 case PIPE_FORMAT_A8R8G8B8_UNORM
:
436 if (spu
.blend
.colormask
& (1<<0))
437 cmask
|= 0x00ff0000; /* red */
438 if (spu
.blend
.colormask
& (1<<1))
439 cmask
|= 0x0000ff00; /* green */
440 if (spu
.blend
.colormask
& (1<<2))
441 cmask
|= 0x000000ff; /* blue */
442 if (spu
.blend
.colormask
& (1<<3))
443 cmask
|= 0xff000000; /* alpha */
445 case PIPE_FORMAT_B8G8R8A8_UNORM
:
446 if (spu
.blend
.colormask
& (1<<0))
447 cmask
|= 0x0000ff00; /* red */
448 if (spu
.blend
.colormask
& (1<<1))
449 cmask
|= 0x00ff0000; /* green */
450 if (spu
.blend
.colormask
& (1<<2))
451 cmask
|= 0xff000000; /* blue */
452 if (spu
.blend
.colormask
& (1<<3))
453 cmask
|= 0x000000ff; /* alpha */
460 * Apply color mask to the 32-bit packed colors.
462 * frag color[i] = frag color[i];
464 * frag color[i] = framebuffer color[i];
466 fragc0
= (fragc0
& cmask
) | (fbc0
& ~cmask
);
467 fragc1
= (fragc1
& cmask
) | (fbc1
& ~cmask
);
468 fragc2
= (fragc2
& cmask
) | (fbc2
& ~cmask
);
469 fragc3
= (fragc3
& cmask
) | (fbc3
& ~cmask
);
476 if (spu
.blend
.logicop_enable
) {
478 /* apply logicop to 32-bit packed colors (fragcx and fbcx) */
483 * If mask is non-zero, mark tile as dirty.
485 if (spu_extract(spu_orx(mask
), 0)) {
486 spu
.cur_ctile_status
= TILE_STATUS_DIRTY
;
489 /* write no fragments */
495 * Write new fragment/quad colors to the framebuffer/tile.
496 * Only write pixels where the corresponding mask word is set.
498 #if LINEAR_QUAD_LAYOUT
505 if (spu_extract(mask
, 0))
506 colorTile
->ui
[y
][x
*2] = fragc0
;
507 if (spu_extract(mask
, 1))
508 colorTile
->ui
[y
][x
*2+1] = fragc1
;
509 if (spu_extract(mask
, 2))
510 colorTile
->ui
[y
][x
*2+2] = fragc2
;
511 if (spu_extract(mask
, 3))
512 colorTile
->ui
[y
][x
*2+3] = fragc3
;
522 if (spu_extract(mask
, 0))
523 colorTile
->ui
[y
+0][x
+0] = fragc0
;
524 if (spu_extract(mask
, 1))
525 colorTile
->ui
[y
+0][x
+1] = fragc1
;
526 if (spu_extract(mask
, 2))
527 colorTile
->ui
[y
+1][x
+0] = fragc2
;
528 if (spu_extract(mask
, 3))
529 colorTile
->ui
[y
+1][x
+1] = fragc3
;