CELL: changes to generate SPU code for stenciling
[mesa.git] / src / gallium / drivers / cell / spu / spu_per_fragment_op.c
1 /**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * \author Brian Paul
30 */
31
32
33 #include <transpose_matrix4x4.h>
34 #include "pipe/p_format.h"
35 #include "spu_main.h"
36 #include "spu_colorpack.h"
37 #include "spu_per_fragment_op.h"
38
39
40 #define LINEAR_QUAD_LAYOUT 1
41
42
43 /**
44 * Called by rasterizer for each quad after the shader has run. Do
45 * all the per-fragment operations including alpha test, z test,
46 * stencil test, blend, colormask and logicops. This is a
47 * fallback/debug function. In reality we'll use a generated function
48 * produced by the PPU. But this function is useful for
49 * debug/validation.
50 */
51 void
52 spu_fallback_fragment_ops(uint x, uint y,
53 tile_t *colorTile,
54 tile_t *depthStencilTile,
55 vector float fragZ,
56 vector float fragR,
57 vector float fragG,
58 vector float fragB,
59 vector float fragA,
60 vector unsigned int mask,
61 uint facing)
62 {
63 vector float frag_aos[4];
64 unsigned int fbc0, fbc1, fbc2, fbc3 ; /* framebuffer/tile colors */
65 unsigned int fragc0, fragc1, fragc2, fragc3; /* fragment colors */
66
67 /*
68 * Do alpha test
69 */
70 if (spu.depth_stencil_alpha.alpha.enabled) {
71 vector float ref = spu_splats(spu.depth_stencil_alpha.alpha.ref);
72 vector unsigned int amask;
73
74 switch (spu.depth_stencil_alpha.alpha.func) {
75 case PIPE_FUNC_LESS:
76 amask = spu_cmpgt(ref, fragA); /* mask = (fragA < ref) */
77 break;
78 case PIPE_FUNC_GREATER:
79 amask = spu_cmpgt(fragA, ref); /* mask = (fragA > ref) */
80 break;
81 case PIPE_FUNC_GEQUAL:
82 amask = spu_cmpgt(ref, fragA);
83 amask = spu_nor(amask, amask);
84 break;
85 case PIPE_FUNC_LEQUAL:
86 amask = spu_cmpgt(fragA, ref);
87 amask = spu_nor(amask, amask);
88 break;
89 case PIPE_FUNC_EQUAL:
90 amask = spu_cmpeq(ref, fragA);
91 break;
92 case PIPE_FUNC_NOTEQUAL:
93 amask = spu_cmpeq(ref, fragA);
94 amask = spu_nor(amask, amask);
95 break;
96 case PIPE_FUNC_ALWAYS:
97 amask = spu_splats(0xffffffffU);
98 break;
99 case PIPE_FUNC_NEVER:
100 amask = spu_splats( 0x0U);
101 break;
102 default:
103 ;
104 }
105
106 mask = spu_and(mask, amask);
107 }
108
109
110 /*
111 * Z and/or stencil testing...
112 */
113 if (spu.depth_stencil_alpha.depth.enabled ||
114 spu.depth_stencil_alpha.stencil[0].enabled) {
115
116 /* get four Z/Stencil values from tile */
117 vector unsigned int mask24 = spu_splats((unsigned int)0x00ffffffU);
118 vector unsigned int ifbZS = depthStencilTile->ui4[y/2][x/2];
119 vector unsigned int ifbZ = spu_and(ifbZS, mask24);
120 vector unsigned int ifbS = spu_andc(ifbZS, mask24);
121
122 if (spu.depth_stencil_alpha.stencil[0].enabled) {
123 /* do stencil test */
124 ASSERT(spu.fb.depth_format == PIPE_FORMAT_S8Z24_UNORM);
125
126 }
127 else if (spu.depth_stencil_alpha.depth.enabled) {
128 /* do depth test */
129
130 ASSERT(spu.fb.depth_format == PIPE_FORMAT_S8Z24_UNORM ||
131 spu.fb.depth_format == PIPE_FORMAT_X8Z24_UNORM);
132
133 vector unsigned int ifragZ;
134 vector unsigned int zmask;
135
136 /* convert four fragZ from float to uint */
137 fragZ = spu_mul(fragZ, spu_splats((float) 0xffffff));
138 ifragZ = spu_convtu(fragZ, 0);
139
140 /* do depth comparison, setting zmask with results */
141 switch (spu.depth_stencil_alpha.depth.func) {
142 case PIPE_FUNC_LESS:
143 zmask = spu_cmpgt(ifbZ, ifragZ); /* mask = (ifragZ < ifbZ) */
144 break;
145 case PIPE_FUNC_GREATER:
146 zmask = spu_cmpgt(ifragZ, ifbZ); /* mask = (ifbZ > ifragZ) */
147 break;
148 case PIPE_FUNC_GEQUAL:
149 zmask = spu_cmpgt(ifbZ, ifragZ);
150 zmask = spu_nor(zmask, zmask);
151 break;
152 case PIPE_FUNC_LEQUAL:
153 zmask = spu_cmpgt(ifragZ, ifbZ);
154 zmask = spu_nor(zmask, zmask);
155 break;
156 case PIPE_FUNC_EQUAL:
157 zmask = spu_cmpeq(ifbZ, ifragZ);
158 break;
159 case PIPE_FUNC_NOTEQUAL:
160 zmask = spu_cmpeq(ifbZ, ifragZ);
161 zmask = spu_nor(zmask, zmask);
162 break;
163 case PIPE_FUNC_ALWAYS:
164 zmask = spu_splats(0xffffffffU);
165 break;
166 case PIPE_FUNC_NEVER:
167 zmask = spu_splats( 0x0U);
168 break;
169 default:
170 ;
171 }
172
173 mask = spu_and(mask, zmask);
174
175 /* merge framebuffer Z and fragment Z according to the mask */
176 ifbZ = spu_or(spu_and(ifragZ, mask),
177 spu_andc(ifbZ, mask));
178 }
179
180 if (spu_extract(spu_orx(mask), 0)) {
181 /* put new fragment Z/Stencil values back into Z/Stencil tile */
182 depthStencilTile->ui4[y/2][x/2] = spu_or(ifbZ, ifbS);
183
184 spu.cur_ztile_status = TILE_STATUS_DIRTY;
185 }
186 }
187
188
189 /*
190 * If we'll need the current framebuffer/tile colors for blending
191 * or logicop or colormask, fetch them now.
192 */
193 if (spu.blend.blend_enable ||
194 spu.blend.logicop_enable ||
195 spu.blend.colormask != 0xf) {
196
197 #if LINEAR_QUAD_LAYOUT /* See comments/diagram below */
198 fbc0 = colorTile->ui[y][x*2+0];
199 fbc1 = colorTile->ui[y][x*2+1];
200 fbc2 = colorTile->ui[y][x*2+2];
201 fbc3 = colorTile->ui[y][x*2+3];
202 #else
203 fbc0 = colorTile->ui[y+0][x+0];
204 fbc1 = colorTile->ui[y+0][x+1];
205 fbc2 = colorTile->ui[y+1][x+0];
206 fbc3 = colorTile->ui[y+1][x+1];
207 #endif
208 }
209
210
211 /*
212 * Do blending
213 */
214 if (spu.blend.blend_enable) {
215 /* blending terms, misc regs */
216 vector float term1r, term1g, term1b, term1a;
217 vector float term2r, term2g, term2b, term2a;
218 vector float one, tmp;
219
220 vector float fbRGBA[4]; /* current framebuffer colors */
221
222 /* convert framebuffer colors from packed int to vector float */
223 {
224 vector float temp[4]; /* float colors in AOS form */
225 switch (spu.fb.color_format) {
226 case PIPE_FORMAT_B8G8R8A8_UNORM:
227 temp[0] = spu_unpack_B8G8R8A8(fbc0);
228 temp[1] = spu_unpack_B8G8R8A8(fbc1);
229 temp[2] = spu_unpack_B8G8R8A8(fbc2);
230 temp[3] = spu_unpack_B8G8R8A8(fbc3);
231 break;
232 case PIPE_FORMAT_A8R8G8B8_UNORM:
233 temp[0] = spu_unpack_A8R8G8B8(fbc0);
234 temp[1] = spu_unpack_A8R8G8B8(fbc1);
235 temp[2] = spu_unpack_A8R8G8B8(fbc2);
236 temp[3] = spu_unpack_A8R8G8B8(fbc3);
237 break;
238 default:
239 ASSERT(0);
240 }
241 _transpose_matrix4x4(fbRGBA, temp); /* fbRGBA = transpose(temp) */
242 }
243
244 /*
245 * Compute Src RGB terms
246 */
247 switch (spu.blend.rgb_src_factor) {
248 case PIPE_BLENDFACTOR_ONE:
249 term1r = fragR;
250 term1g = fragG;
251 term1b = fragB;
252 break;
253 case PIPE_BLENDFACTOR_ZERO:
254 term1r =
255 term1g =
256 term1b = spu_splats(0.0f);
257 break;
258 case PIPE_BLENDFACTOR_SRC_COLOR:
259 term1r = spu_mul(fragR, fragR);
260 term1g = spu_mul(fragG, fragG);
261 term1b = spu_mul(fragB, fragB);
262 break;
263 case PIPE_BLENDFACTOR_SRC_ALPHA:
264 term1r = spu_mul(fragR, fragA);
265 term1g = spu_mul(fragG, fragA);
266 term1b = spu_mul(fragB, fragA);
267 break;
268 /* XXX more cases */
269 default:
270 ASSERT(0);
271 }
272
273 /*
274 * Compute Src Alpha term
275 */
276 switch (spu.blend.alpha_src_factor) {
277 case PIPE_BLENDFACTOR_ONE:
278 term1a = fragA;
279 break;
280 case PIPE_BLENDFACTOR_SRC_COLOR:
281 term1a = spu_splats(0.0f);
282 break;
283 case PIPE_BLENDFACTOR_SRC_ALPHA:
284 term1a = spu_mul(fragA, fragA);
285 break;
286 /* XXX more cases */
287 default:
288 ASSERT(0);
289 }
290
291 /*
292 * Compute Dest RGB terms
293 */
294 switch (spu.blend.rgb_dst_factor) {
295 case PIPE_BLENDFACTOR_ONE:
296 term2r = fragR;
297 term2g = fragG;
298 term2b = fragB;
299 break;
300 case PIPE_BLENDFACTOR_ZERO:
301 term2r =
302 term2g =
303 term2b = spu_splats(0.0f);
304 break;
305 case PIPE_BLENDFACTOR_SRC_COLOR:
306 term2r = spu_mul(fbRGBA[0], fragR);
307 term2g = spu_mul(fbRGBA[1], fragG);
308 term2b = spu_mul(fbRGBA[2], fragB);
309 break;
310 case PIPE_BLENDFACTOR_SRC_ALPHA:
311 term2r = spu_mul(fbRGBA[0], fragA);
312 term2g = spu_mul(fbRGBA[1], fragA);
313 term2b = spu_mul(fbRGBA[2], fragA);
314 break;
315 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
316 one = spu_splats(1.0f);
317 tmp = spu_sub(one, fragA);
318 term2r = spu_mul(fbRGBA[0], tmp);
319 term2g = spu_mul(fbRGBA[1], tmp);
320 term2b = spu_mul(fbRGBA[2], tmp);
321 break;
322 /* XXX more cases */
323 default:
324 ASSERT(0);
325 }
326
327 /*
328 * Compute Dest Alpha term
329 */
330 switch (spu.blend.alpha_dst_factor) {
331 case PIPE_BLENDFACTOR_ONE:
332 term2a = fragA;
333 break;
334 case PIPE_BLENDFACTOR_SRC_COLOR:
335 term2a = spu_splats(0.0f);
336 break;
337 case PIPE_BLENDFACTOR_SRC_ALPHA:
338 term2a = spu_mul(fbRGBA[3], fragA);
339 break;
340 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
341 one = spu_splats(1.0f);
342 tmp = spu_sub(one, fragA);
343 term2a = spu_mul(fbRGBA[3], tmp);
344 break;
345 /* XXX more cases */
346 default:
347 ASSERT(0);
348 }
349
350 /*
351 * Combine Src/Dest RGB terms
352 */
353 switch (spu.blend.rgb_func) {
354 case PIPE_BLEND_ADD:
355 fragR = spu_add(term1r, term2r);
356 fragG = spu_add(term1g, term2g);
357 fragB = spu_add(term1b, term2b);
358 break;
359 case PIPE_BLEND_SUBTRACT:
360 fragR = spu_sub(term1r, term2r);
361 fragG = spu_sub(term1g, term2g);
362 fragB = spu_sub(term1b, term2b);
363 break;
364 /* XXX more cases */
365 default:
366 ASSERT(0);
367 }
368
369 /*
370 * Combine Src/Dest A term
371 */
372 switch (spu.blend.alpha_func) {
373 case PIPE_BLEND_ADD:
374 fragA = spu_add(term1a, term2a);
375 break;
376 case PIPE_BLEND_SUBTRACT:
377 fragA = spu_sub(term1a, term2a);
378 break;
379 /* XXX more cases */
380 default:
381 ASSERT(0);
382 }
383 }
384
385
386 /*
387 * Convert RRRR,GGGG,BBBB,AAAA to RGBA,RGBA,RGBA,RGBA.
388 */
389 #if 0
390 /* original code */
391 {
392 vector float frag_soa[4];
393 frag_soa[0] = fragR;
394 frag_soa[1] = fragG;
395 frag_soa[2] = fragB;
396 frag_soa[3] = fragA;
397 _transpose_matrix4x4(frag_aos, frag_soa);
398 }
399 #else
400 /* short-cut relying on function parameter layout: */
401 _transpose_matrix4x4(frag_aos, &fragR);
402 (void) fragG;
403 (void) fragB;
404 #endif
405
406 /*
407 * Pack fragment float colors into 32-bit RGBA words.
408 */
409 switch (spu.fb.color_format) {
410 case PIPE_FORMAT_A8R8G8B8_UNORM:
411 fragc0 = spu_pack_A8R8G8B8(frag_aos[0]);
412 fragc1 = spu_pack_A8R8G8B8(frag_aos[1]);
413 fragc2 = spu_pack_A8R8G8B8(frag_aos[2]);
414 fragc3 = spu_pack_A8R8G8B8(frag_aos[3]);
415 break;
416 case PIPE_FORMAT_B8G8R8A8_UNORM:
417 fragc0 = spu_pack_B8G8R8A8(frag_aos[0]);
418 fragc1 = spu_pack_B8G8R8A8(frag_aos[1]);
419 fragc2 = spu_pack_B8G8R8A8(frag_aos[2]);
420 fragc3 = spu_pack_B8G8R8A8(frag_aos[3]);
421 break;
422 default:
423 fprintf(stderr, "SPU: Bad pixel format in spu_default_fragment_ops\n");
424 ASSERT(0);
425 }
426
427
428 /*
429 * Do color masking
430 */
431 if (spu.blend.colormask != 0xf) {
432 uint cmask = 0x0; /* each byte corresponds to a color channel */
433
434 /* Form bitmask depending on color buffer format and colormask bits */
435 switch (spu.fb.color_format) {
436 case PIPE_FORMAT_A8R8G8B8_UNORM:
437 if (spu.blend.colormask & PIPE_MASK_R)
438 cmask |= 0x00ff0000; /* red */
439 if (spu.blend.colormask & PIPE_MASK_G)
440 cmask |= 0x0000ff00; /* green */
441 if (spu.blend.colormask & PIPE_MASK_B)
442 cmask |= 0x000000ff; /* blue */
443 if (spu.blend.colormask & PIPE_MASK_A)
444 cmask |= 0xff000000; /* alpha */
445 break;
446 case PIPE_FORMAT_B8G8R8A8_UNORM:
447 if (spu.blend.colormask & PIPE_MASK_R)
448 cmask |= 0x0000ff00; /* red */
449 if (spu.blend.colormask & PIPE_MASK_G)
450 cmask |= 0x00ff0000; /* green */
451 if (spu.blend.colormask & PIPE_MASK_B)
452 cmask |= 0xff000000; /* blue */
453 if (spu.blend.colormask & PIPE_MASK_A)
454 cmask |= 0x000000ff; /* alpha */
455 break;
456 default:
457 ASSERT(0);
458 }
459
460 /*
461 * Apply color mask to the 32-bit packed colors.
462 * if (cmask[i])
463 * frag color[i] = frag color[i];
464 * else
465 * frag color[i] = framebuffer color[i];
466 */
467 fragc0 = (fragc0 & cmask) | (fbc0 & ~cmask);
468 fragc1 = (fragc1 & cmask) | (fbc1 & ~cmask);
469 fragc2 = (fragc2 & cmask) | (fbc2 & ~cmask);
470 fragc3 = (fragc3 & cmask) | (fbc3 & ~cmask);
471 }
472
473
474 /*
475 * Do logic ops
476 */
477 if (spu.blend.logicop_enable) {
478 /* XXX to do */
479 /* apply logicop to 32-bit packed colors (fragcx and fbcx) */
480 }
481
482
483 /*
484 * If mask is non-zero, mark tile as dirty.
485 */
486 if (spu_extract(spu_orx(mask), 0)) {
487 spu.cur_ctile_status = TILE_STATUS_DIRTY;
488 }
489 else {
490 /* write no fragments */
491 return;
492 }
493
494
495 /*
496 * Write new fragment/quad colors to the framebuffer/tile.
497 * Only write pixels where the corresponding mask word is set.
498 */
499 #if LINEAR_QUAD_LAYOUT
500 /*
501 * Quad layout:
502 * +--+--+--+--+
503 * |p0|p1|p2|p3|...
504 * +--+--+--+--+
505 */
506 if (spu_extract(mask, 0))
507 colorTile->ui[y][x*2] = fragc0;
508 if (spu_extract(mask, 1))
509 colorTile->ui[y][x*2+1] = fragc1;
510 if (spu_extract(mask, 2))
511 colorTile->ui[y][x*2+2] = fragc2;
512 if (spu_extract(mask, 3))
513 colorTile->ui[y][x*2+3] = fragc3;
514 #else
515 /*
516 * Quad layout:
517 * +--+--+
518 * |p0|p1|...
519 * +--+--+
520 * |p2|p3|...
521 * +--+--+
522 */
523 if (spu_extract(mask, 0))
524 colorTile->ui[y+0][x+0] = fragc0;
525 if (spu_extract(mask, 1))
526 colorTile->ui[y+0][x+1] = fragc1;
527 if (spu_extract(mask, 2))
528 colorTile->ui[y+1][x+0] = fragc2;
529 if (spu_extract(mask, 3))
530 colorTile->ui[y+1][x+1] = fragc3;
531 #endif
532 }