Merge commit 'origin/master' into HEAD
[mesa.git] / src / gallium / drivers / cell / spu / spu_per_fragment_op.c
1 /**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * \author Brian Paul
30 */
31
32
33 #include <transpose_matrix4x4.h>
34 #include "pipe/p_format.h"
35 #include "spu_main.h"
36 #include "spu_colorpack.h"
37 #include "spu_per_fragment_op.h"
38
39
40 #define LINEAR_QUAD_LAYOUT 1
41
42
43 /**
44 * Called by rasterizer for each quad after the shader has run. Do
45 * all the per-fragment operations including alpha test, z test,
46 * stencil test, blend, colormask and logicops. This is a
47 * fallback/debug function. In reality we'll use a generated function
48 * produced by the PPU. But this function is useful for
49 * debug/validation.
50 */
51 void
52 spu_fallback_fragment_ops(uint x, uint y,
53 tile_t *colorTile,
54 tile_t *depthStencilTile,
55 vector float fragZ,
56 vector float fragR,
57 vector float fragG,
58 vector float fragB,
59 vector float fragA,
60 vector unsigned int mask)
61 {
62 vector float frag_aos[4];
63 unsigned int fbc0, fbc1, fbc2, fbc3 ; /* framebuffer/tile colors */
64 unsigned int fragc0, fragc1, fragc2, fragc3; /* fragment colors */
65
66 /*
67 * Do alpha test
68 */
69 if (spu.depth_stencil_alpha.alpha.enabled) {
70 vector float ref = spu_splats(spu.depth_stencil_alpha.alpha.ref);
71 vector unsigned int amask;
72
73 switch (spu.depth_stencil_alpha.alpha.func) {
74 case PIPE_FUNC_LESS:
75 amask = spu_cmpgt(ref, fragA); /* mask = (fragA < ref) */
76 break;
77 case PIPE_FUNC_GREATER:
78 amask = spu_cmpgt(fragA, ref); /* mask = (fragA > ref) */
79 break;
80 case PIPE_FUNC_GEQUAL:
81 amask = spu_cmpgt(ref, fragA);
82 amask = spu_nor(amask, amask);
83 break;
84 case PIPE_FUNC_LEQUAL:
85 amask = spu_cmpgt(fragA, ref);
86 amask = spu_nor(amask, amask);
87 break;
88 case PIPE_FUNC_EQUAL:
89 amask = spu_cmpeq(ref, fragA);
90 break;
91 case PIPE_FUNC_NOTEQUAL:
92 amask = spu_cmpeq(ref, fragA);
93 amask = spu_nor(amask, amask);
94 break;
95 case PIPE_FUNC_ALWAYS:
96 amask = spu_splats(0xffffffffU);
97 break;
98 case PIPE_FUNC_NEVER:
99 amask = spu_splats( 0x0U);
100 break;
101 default:
102 ;
103 }
104
105 mask = spu_and(mask, amask);
106 }
107
108
109 /*
110 * Z and/or stencil testing...
111 */
112 if (spu.depth_stencil_alpha.depth.enabled ||
113 spu.depth_stencil_alpha.stencil[0].enabled) {
114
115 /* get four Z/Stencil values from tile */
116 vector unsigned int mask24 = spu_splats((unsigned int)0x00ffffffU);
117 vector unsigned int ifbZS = depthStencilTile->ui4[y/2][x/2];
118 vector unsigned int ifbZ = spu_and(ifbZS, mask24);
119 vector unsigned int ifbS = spu_andc(ifbZS, mask24);
120
121 if (spu.depth_stencil_alpha.stencil[0].enabled) {
122 /* do stencil test */
123 ASSERT(spu.fb.depth_format == PIPE_FORMAT_S8Z24_UNORM);
124
125 }
126 else if (spu.depth_stencil_alpha.depth.enabled) {
127 /* do depth test */
128
129 ASSERT(spu.fb.depth_format == PIPE_FORMAT_S8Z24_UNORM ||
130 spu.fb.depth_format == PIPE_FORMAT_X8Z24_UNORM);
131
132 vector unsigned int ifragZ;
133 vector unsigned int zmask;
134
135 /* convert four fragZ from float to uint */
136 fragZ = spu_mul(fragZ, spu_splats((float) 0xffffff));
137 ifragZ = spu_convtu(fragZ, 0);
138
139 /* do depth comparison, setting zmask with results */
140 switch (spu.depth_stencil_alpha.depth.func) {
141 case PIPE_FUNC_LESS:
142 zmask = spu_cmpgt(ifbZ, ifragZ); /* mask = (ifragZ < ifbZ) */
143 break;
144 case PIPE_FUNC_GREATER:
145 zmask = spu_cmpgt(ifragZ, ifbZ); /* mask = (ifbZ > ifragZ) */
146 break;
147 case PIPE_FUNC_GEQUAL:
148 zmask = spu_cmpgt(ifbZ, ifragZ);
149 zmask = spu_nor(zmask, zmask);
150 break;
151 case PIPE_FUNC_LEQUAL:
152 zmask = spu_cmpgt(ifragZ, ifbZ);
153 zmask = spu_nor(zmask, zmask);
154 break;
155 case PIPE_FUNC_EQUAL:
156 zmask = spu_cmpeq(ifbZ, ifragZ);
157 break;
158 case PIPE_FUNC_NOTEQUAL:
159 zmask = spu_cmpeq(ifbZ, ifragZ);
160 zmask = spu_nor(zmask, zmask);
161 break;
162 case PIPE_FUNC_ALWAYS:
163 zmask = spu_splats(0xffffffffU);
164 break;
165 case PIPE_FUNC_NEVER:
166 zmask = spu_splats( 0x0U);
167 break;
168 default:
169 ;
170 }
171
172 mask = spu_and(mask, zmask);
173
174 /* merge framebuffer Z and fragment Z according to the mask */
175 ifbZ = spu_or(spu_and(ifragZ, mask),
176 spu_andc(ifbZ, mask));
177 }
178
179 if (spu_extract(spu_orx(mask), 0)) {
180 /* put new fragment Z/Stencil values back into Z/Stencil tile */
181 depthStencilTile->ui4[y/2][x/2] = spu_or(ifbZ, ifbS);
182
183 spu.cur_ztile_status = TILE_STATUS_DIRTY;
184 }
185 }
186
187
188 /*
189 * If we'll need the current framebuffer/tile colors for blending
190 * or logicop or colormask, fetch them now.
191 */
192 if (spu.blend.blend_enable ||
193 spu.blend.logicop_enable ||
194 spu.blend.colormask != 0xf) {
195
196 #if LINEAR_QUAD_LAYOUT /* See comments/diagram below */
197 fbc0 = colorTile->ui[y][x*2+0];
198 fbc1 = colorTile->ui[y][x*2+1];
199 fbc2 = colorTile->ui[y][x*2+2];
200 fbc3 = colorTile->ui[y][x*2+3];
201 #else
202 fbc0 = colorTile->ui[y+0][x+0];
203 fbc1 = colorTile->ui[y+0][x+1];
204 fbc2 = colorTile->ui[y+1][x+0];
205 fbc3 = colorTile->ui[y+1][x+1];
206 #endif
207 }
208
209
210 /*
211 * Do blending
212 */
213 if (spu.blend.blend_enable) {
214 /* blending terms, misc regs */
215 vector float term1r, term1g, term1b, term1a;
216 vector float term2r, term2g, term2b, term2a;
217 vector float one, tmp;
218
219 vector float fbRGBA[4]; /* current framebuffer colors */
220
221 /* convert framebuffer colors from packed int to vector float */
222 {
223 vector float temp[4]; /* float colors in AOS form */
224 switch (spu.fb.color_format) {
225 case PIPE_FORMAT_B8G8R8A8_UNORM:
226 temp[0] = spu_unpack_B8G8R8A8(fbc0);
227 temp[1] = spu_unpack_B8G8R8A8(fbc1);
228 temp[2] = spu_unpack_B8G8R8A8(fbc2);
229 temp[3] = spu_unpack_B8G8R8A8(fbc3);
230 break;
231 case PIPE_FORMAT_A8R8G8B8_UNORM:
232 temp[0] = spu_unpack_A8R8G8B8(fbc0);
233 temp[1] = spu_unpack_A8R8G8B8(fbc1);
234 temp[2] = spu_unpack_A8R8G8B8(fbc2);
235 temp[3] = spu_unpack_A8R8G8B8(fbc3);
236 break;
237 default:
238 ASSERT(0);
239 }
240 _transpose_matrix4x4(fbRGBA, temp); /* fbRGBA = transpose(temp) */
241 }
242
243 /*
244 * Compute Src RGB terms
245 */
246 switch (spu.blend.rgb_src_factor) {
247 case PIPE_BLENDFACTOR_ONE:
248 term1r = fragR;
249 term1g = fragG;
250 term1b = fragB;
251 break;
252 case PIPE_BLENDFACTOR_ZERO:
253 term1r =
254 term1g =
255 term1b = spu_splats(0.0f);
256 break;
257 case PIPE_BLENDFACTOR_SRC_COLOR:
258 term1r = spu_mul(fragR, fragR);
259 term1g = spu_mul(fragG, fragG);
260 term1b = spu_mul(fragB, fragB);
261 break;
262 case PIPE_BLENDFACTOR_SRC_ALPHA:
263 term1r = spu_mul(fragR, fragA);
264 term1g = spu_mul(fragG, fragA);
265 term1b = spu_mul(fragB, fragA);
266 break;
267 /* XXX more cases */
268 default:
269 ASSERT(0);
270 }
271
272 /*
273 * Compute Src Alpha term
274 */
275 switch (spu.blend.alpha_src_factor) {
276 case PIPE_BLENDFACTOR_ONE:
277 term1a = fragA;
278 break;
279 case PIPE_BLENDFACTOR_SRC_COLOR:
280 term1a = spu_splats(0.0f);
281 break;
282 case PIPE_BLENDFACTOR_SRC_ALPHA:
283 term1a = spu_mul(fragA, fragA);
284 break;
285 /* XXX more cases */
286 default:
287 ASSERT(0);
288 }
289
290 /*
291 * Compute Dest RGB terms
292 */
293 switch (spu.blend.rgb_dst_factor) {
294 case PIPE_BLENDFACTOR_ONE:
295 term2r = fragR;
296 term2g = fragG;
297 term2b = fragB;
298 break;
299 case PIPE_BLENDFACTOR_ZERO:
300 term2r =
301 term2g =
302 term2b = spu_splats(0.0f);
303 break;
304 case PIPE_BLENDFACTOR_SRC_COLOR:
305 term2r = spu_mul(fbRGBA[0], fragR);
306 term2g = spu_mul(fbRGBA[1], fragG);
307 term2b = spu_mul(fbRGBA[2], fragB);
308 break;
309 case PIPE_BLENDFACTOR_SRC_ALPHA:
310 term2r = spu_mul(fbRGBA[0], fragA);
311 term2g = spu_mul(fbRGBA[1], fragA);
312 term2b = spu_mul(fbRGBA[2], fragA);
313 break;
314 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
315 one = spu_splats(1.0f);
316 tmp = spu_sub(one, fragA);
317 term2r = spu_mul(fbRGBA[0], tmp);
318 term2g = spu_mul(fbRGBA[1], tmp);
319 term2b = spu_mul(fbRGBA[2], tmp);
320 break;
321 /* XXX more cases */
322 default:
323 ASSERT(0);
324 }
325
326 /*
327 * Compute Dest Alpha term
328 */
329 switch (spu.blend.alpha_dst_factor) {
330 case PIPE_BLENDFACTOR_ONE:
331 term2a = fragA;
332 break;
333 case PIPE_BLENDFACTOR_SRC_COLOR:
334 term2a = spu_splats(0.0f);
335 break;
336 case PIPE_BLENDFACTOR_SRC_ALPHA:
337 term2a = spu_mul(fbRGBA[3], fragA);
338 break;
339 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
340 one = spu_splats(1.0f);
341 tmp = spu_sub(one, fragA);
342 term2a = spu_mul(fbRGBA[3], tmp);
343 break;
344 /* XXX more cases */
345 default:
346 ASSERT(0);
347 }
348
349 /*
350 * Combine Src/Dest RGB terms
351 */
352 switch (spu.blend.rgb_func) {
353 case PIPE_BLEND_ADD:
354 fragR = spu_add(term1r, term2r);
355 fragG = spu_add(term1g, term2g);
356 fragB = spu_add(term1b, term2b);
357 break;
358 case PIPE_BLEND_SUBTRACT:
359 fragR = spu_sub(term1r, term2r);
360 fragG = spu_sub(term1g, term2g);
361 fragB = spu_sub(term1b, term2b);
362 break;
363 /* XXX more cases */
364 default:
365 ASSERT(0);
366 }
367
368 /*
369 * Combine Src/Dest A term
370 */
371 switch (spu.blend.alpha_func) {
372 case PIPE_BLEND_ADD:
373 fragA = spu_add(term1a, term2a);
374 break;
375 case PIPE_BLEND_SUBTRACT:
376 fragA = spu_sub(term1a, term2a);
377 break;
378 /* XXX more cases */
379 default:
380 ASSERT(0);
381 }
382 }
383
384
385 /*
386 * Convert RRRR,GGGG,BBBB,AAAA to RGBA,RGBA,RGBA,RGBA.
387 */
388 #if 0
389 /* original code */
390 {
391 vector float frag_soa[4];
392 frag_soa[0] = fragR;
393 frag_soa[1] = fragG;
394 frag_soa[2] = fragB;
395 frag_soa[3] = fragA;
396 _transpose_matrix4x4(frag_aos, frag_soa);
397 }
398 #else
399 /* short-cut relying on function parameter layout: */
400 _transpose_matrix4x4(frag_aos, &fragR);
401 (void) fragG;
402 (void) fragB;
403 #endif
404
405 /*
406 * Pack fragment float colors into 32-bit RGBA words.
407 */
408 switch (spu.fb.color_format) {
409 case PIPE_FORMAT_A8R8G8B8_UNORM:
410 fragc0 = spu_pack_A8R8G8B8(frag_aos[0]);
411 fragc1 = spu_pack_A8R8G8B8(frag_aos[1]);
412 fragc2 = spu_pack_A8R8G8B8(frag_aos[2]);
413 fragc3 = spu_pack_A8R8G8B8(frag_aos[3]);
414 break;
415 case PIPE_FORMAT_B8G8R8A8_UNORM:
416 fragc0 = spu_pack_B8G8R8A8(frag_aos[0]);
417 fragc1 = spu_pack_B8G8R8A8(frag_aos[1]);
418 fragc2 = spu_pack_B8G8R8A8(frag_aos[2]);
419 fragc3 = spu_pack_B8G8R8A8(frag_aos[3]);
420 break;
421 default:
422 fprintf(stderr, "SPU: Bad pixel format in spu_default_fragment_ops\n");
423 ASSERT(0);
424 }
425
426
427 /*
428 * Do color masking
429 */
430 if (spu.blend.colormask != 0xf) {
431 uint cmask = 0x0; /* each byte corresponds to a color channel */
432
433 /* Form bitmask depending on color buffer format and colormask bits */
434 switch (spu.fb.color_format) {
435 case PIPE_FORMAT_A8R8G8B8_UNORM:
436 if (spu.blend.colormask & (1<<0))
437 cmask |= 0x00ff0000; /* red */
438 if (spu.blend.colormask & (1<<1))
439 cmask |= 0x0000ff00; /* green */
440 if (spu.blend.colormask & (1<<2))
441 cmask |= 0x000000ff; /* blue */
442 if (spu.blend.colormask & (1<<3))
443 cmask |= 0xff000000; /* alpha */
444 break;
445 case PIPE_FORMAT_B8G8R8A8_UNORM:
446 if (spu.blend.colormask & (1<<0))
447 cmask |= 0x0000ff00; /* red */
448 if (spu.blend.colormask & (1<<1))
449 cmask |= 0x00ff0000; /* green */
450 if (spu.blend.colormask & (1<<2))
451 cmask |= 0xff000000; /* blue */
452 if (spu.blend.colormask & (1<<3))
453 cmask |= 0x000000ff; /* alpha */
454 break;
455 default:
456 ASSERT(0);
457 }
458
459 /*
460 * Apply color mask to the 32-bit packed colors.
461 * if (cmask[i])
462 * frag color[i] = frag color[i];
463 * else
464 * frag color[i] = framebuffer color[i];
465 */
466 fragc0 = (fragc0 & cmask) | (fbc0 & ~cmask);
467 fragc1 = (fragc1 & cmask) | (fbc1 & ~cmask);
468 fragc2 = (fragc2 & cmask) | (fbc2 & ~cmask);
469 fragc3 = (fragc3 & cmask) | (fbc3 & ~cmask);
470 }
471
472
473 /*
474 * Do logic ops
475 */
476 if (spu.blend.logicop_enable) {
477 /* XXX to do */
478 /* apply logicop to 32-bit packed colors (fragcx and fbcx) */
479 }
480
481
482 /*
483 * If mask is non-zero, mark tile as dirty.
484 */
485 if (spu_extract(spu_orx(mask), 0)) {
486 spu.cur_ctile_status = TILE_STATUS_DIRTY;
487 }
488 else {
489 /* write no fragments */
490 return;
491 }
492
493
494 /*
495 * Write new fragment/quad colors to the framebuffer/tile.
496 * Only write pixels where the corresponding mask word is set.
497 */
498 #if LINEAR_QUAD_LAYOUT
499 /*
500 * Quad layout:
501 * +--+--+--+--+
502 * |p0|p1|p2|p3|...
503 * +--+--+--+--+
504 */
505 if (spu_extract(mask, 0))
506 colorTile->ui[y][x*2] = fragc0;
507 if (spu_extract(mask, 1))
508 colorTile->ui[y][x*2+1] = fragc1;
509 if (spu_extract(mask, 2))
510 colorTile->ui[y][x*2+2] = fragc2;
511 if (spu_extract(mask, 3))
512 colorTile->ui[y][x*2+3] = fragc3;
513 #else
514 /*
515 * Quad layout:
516 * +--+--+
517 * |p0|p1|...
518 * +--+--+
519 * |p2|p3|...
520 * +--+--+
521 */
522 if (spu_extract(mask, 0))
523 colorTile->ui[y+0][x+0] = fragc0;
524 if (spu_extract(mask, 1))
525 colorTile->ui[y+0][x+1] = fragc1;
526 if (spu_extract(mask, 2))
527 colorTile->ui[y+1][x+0] = fragc2;
528 if (spu_extract(mask, 3))
529 colorTile->ui[y+1][x+1] = fragc3;
530 #endif
531 }