11c0aa5b23cd181463b2f9d21f9d5e5c2519d682
[mesa.git] / src / gallium / drivers / cell / ppu / cell_state_per_fragment.c
1 /*
2 * (C) Copyright IBM Corporation 2008
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file
27 * Generate code to perform all per-fragment operations.
28 *
29 * Code generated by these functions perform both alpha, depth, and stencil
30 * testing as well as alpha blending.
31 *
32 * \note
33 * Occlusion query is not supported, but this is the right place to add that
34 * support.
35 *
36 * \author Ian Romanick <idr@us.ibm.com>
37 */
38
39 #include "pipe/p_defines.h"
40 #include "pipe/p_state.h"
41
42 #include "cell_context.h"
43
44 #include "rtasm/rtasm_ppc_spe.h"
45
46
47 /**
48 * Generate code to perform alpha testing.
49 *
50 * The code generated by this function uses the register specificed by
51 * \c mask as both an input and an output.
52 *
53 * \param dsa Current alpha-test state
54 * \param f Function to which code should be appended
55 * \param mask Index of register containing active fragment mask
56 * \param alphas Index of register containing per-fragment alpha values
57 *
58 * \note Emits a maximum of 6 instructions.
59 */
60 static void
61 emit_alpha_test(struct pipe_depth_stencil_alpha_state *dsa,
62 struct spe_function *f, int mask, int alphas)
63 {
64 /* If the alpha function is either NEVER or ALWAYS, there is no need to
65 * load the reference value into a register. ALWAYS is a fairly common
66 * case, and this optimization saves 2 instructions.
67 */
68 if (dsa->alpha.enabled
69 && (dsa->alpha.func != PIPE_FUNC_NEVER)
70 && (dsa->alpha.func != PIPE_FUNC_ALWAYS)) {
71 int ref = spe_allocate_available_register(f);
72 int tmp_a = spe_allocate_available_register(f);
73 int tmp_b = spe_allocate_available_register(f);
74 union {
75 float f;
76 unsigned u;
77 } ref_val;
78 boolean complement = FALSE;
79
80 ref_val.f = dsa->alpha.ref;
81
82 spe_il(f, ref, ref_val.u & 0x0000ffff);
83 spe_ilh(f, ref, ref_val.u >> 16);
84
85 switch (dsa->alpha.func) {
86 case PIPE_FUNC_NOTEQUAL:
87 complement = TRUE;
88 /* FALLTHROUGH */
89
90 case PIPE_FUNC_EQUAL:
91 spe_fceq(f, tmp_a, ref, alphas);
92 break;
93
94 case PIPE_FUNC_LEQUAL:
95 complement = TRUE;
96 /* FALLTHROUGH */
97
98 case PIPE_FUNC_GREATER:
99 spe_fcgt(f, tmp_a, ref, alphas);
100 break;
101
102 case PIPE_FUNC_LESS:
103 complement = TRUE;
104 /* FALLTHROUGH */
105
106 case PIPE_FUNC_GEQUAL:
107 spe_fcgt(f, tmp_a, ref, alphas);
108 spe_fceq(f, tmp_b, ref, alphas);
109 spe_or(f, tmp_a, tmp_b, tmp_a);
110 break;
111
112 case PIPE_FUNC_ALWAYS:
113 case PIPE_FUNC_NEVER:
114 default:
115 assert(0);
116 break;
117 }
118
119 if (complement) {
120 spe_andc(f, mask, mask, tmp_a);
121 } else {
122 spe_and(f, mask, mask, tmp_a);
123 }
124
125 spe_release_register(f, ref);
126 spe_release_register(f, tmp_a);
127 spe_release_register(f, tmp_b);
128 } else if (dsa->alpha.enabled && (dsa->alpha.func == PIPE_FUNC_NEVER)) {
129 spe_il(f, mask, 0);
130 }
131 }
132
133
134 /**
135 * \param dsa Current depth-test state
136 * \param f Function to which code should be appended
137 * \param m Mask of allocated / free SPE registers
138 * \param mask Index of register to contain depth-pass mask
139 * \param stored Index of register containing values from depth buffer
140 * \param calculated Index of register containing per-fragment depth values
141 *
142 * \return
143 * If the calculated depth comparison mask is the actual mask, \c FALSE is
144 * returned. If the calculated depth comparison mask is the compliment of
145 * the actual mask, \c TRUE is returned.
146 *
147 * \note Emits a maximum of 3 instructions.
148 */
149 static boolean
150 emit_depth_test(struct pipe_depth_stencil_alpha_state *dsa,
151 struct spe_function *f, int mask, int stored, int calculated)
152 {
153 unsigned func = (dsa->depth.enabled)
154 ? dsa->depth.func : PIPE_FUNC_ALWAYS;
155 int tmp = spe_allocate_available_register(f);
156 boolean compliment = FALSE;
157
158 switch (func) {
159 case PIPE_FUNC_NEVER:
160 spe_il(f, mask, 0);
161 break;
162
163 case PIPE_FUNC_NOTEQUAL:
164 compliment = TRUE;
165 /* FALLTHROUGH */
166 case PIPE_FUNC_EQUAL:
167 spe_ceq(f, mask, calculated, stored);
168 break;
169
170 case PIPE_FUNC_LEQUAL:
171 compliment = TRUE;
172 /* FALLTHROUGH */
173 case PIPE_FUNC_GREATER:
174 spe_clgt(f, mask, calculated, stored);
175 break;
176
177 case PIPE_FUNC_LESS:
178 compliment = TRUE;
179 /* FALLTHROUGH */
180 case PIPE_FUNC_GEQUAL:
181 spe_clgt(f, mask, calculated, stored);
182 spe_ceq(f, tmp, calculated, stored);
183 spe_or(f, mask, mask, tmp);
184 break;
185
186 case PIPE_FUNC_ALWAYS:
187 spe_il(f, mask, ~0);
188 break;
189
190 default:
191 assert(0);
192 break;
193 }
194
195 spe_release_register(f, tmp);
196 return compliment;
197 }
198
199
200 /**
201 * \note Emits a maximum of 5 instructions.
202 */
203 static void
204 emit_stencil_op(struct spe_function *f,
205 int out, int in, int mask, unsigned op, unsigned ref)
206 {
207 const int clamp = spe_allocate_available_register(f);
208 const int tmp = spe_allocate_available_register(f);
209
210 switch(op) {
211 case PIPE_STENCIL_OP_KEEP:
212 assert(0);
213 case PIPE_STENCIL_OP_ZERO:
214 spe_il(f, out, 0);
215 break;
216 case PIPE_STENCIL_OP_REPLACE:
217 spe_il(f, out, ref);
218 break;
219 case PIPE_STENCIL_OP_INCR:
220 spe_il(f, clamp, 0x0ff);
221 spe_ai(f, out, in, 1);
222 spe_cgti(f, tmp, out, clamp);
223 spe_selb(f, out, out, clamp, tmp);
224 break;
225 case PIPE_STENCIL_OP_DECR:
226 spe_il(f, clamp, 0);
227 spe_ai(f, out, in, -1);
228 spe_cgti(f, tmp, out, clamp);
229 spe_selb(f, out, clamp, out, tmp);
230 break;
231 case PIPE_STENCIL_OP_INCR_WRAP:
232 spe_ai(f, out, in, 1);
233 break;
234 case PIPE_STENCIL_OP_DECR_WRAP:
235 spe_ai(f, out, in, -1);
236 break;
237 case PIPE_STENCIL_OP_INVERT:
238 spe_nor(f, out, in, in);
239 break;
240 default:
241 assert(0);
242 }
243
244 spe_release_register(f, tmp);
245 spe_release_register(f, clamp);
246
247 spe_selb(f, out, in, out, mask);
248 }
249
250
251 /**
252 * \param dsa Depth / stencil test state
253 * \param face 0 for front face, 1 for back face
254 * \param f Function to append instructions to
255 * \param reg_mask Mask of allocated registers
256 * \param mask Register containing mask of fragments passing the
257 * alpha test
258 * \param depth_mask Register containing mask of fragments passing the
259 * depth test
260 * \param depth_compliment Is \c depth_mask the compliment of the actual mask?
261 * \param stencil Register containing values from stencil buffer
262 * \param depth_pass Register to store mask of fragments passing stencil test
263 * and depth test
264 *
265 * \note
266 * Emits a maximum of 10 + (3 * 5) = 25 instructions.
267 */
268 static int
269 emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa,
270 unsigned face,
271 struct spe_function *f,
272 int mask,
273 int depth_mask,
274 boolean depth_complement,
275 int stencil,
276 int depth_pass)
277 {
278 int stencil_fail = spe_allocate_available_register(f);
279 int depth_fail = spe_allocate_available_register(f);
280 int stencil_mask = spe_allocate_available_register(f);
281 int stencil_pass = spe_allocate_available_register(f);
282 int face_stencil = spe_allocate_available_register(f);
283 int stencil_src = stencil;
284 const unsigned ref = (dsa->stencil[face].ref_value
285 & dsa->stencil[face].value_mask);
286 boolean complement = FALSE;
287 int stored = spe_allocate_available_register(f);
288 int tmp = spe_allocate_available_register(f);
289
290
291 if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
292 && (dsa->stencil[face].func != PIPE_FUNC_ALWAYS)
293 && (dsa->stencil[face].value_mask != 0x0ff)) {
294 spe_andi(f, stored, stencil, dsa->stencil[face].value_mask);
295 }
296
297
298 switch (dsa->stencil[face].func) {
299 case PIPE_FUNC_NEVER:
300 spe_il(f, stencil_mask, 0);
301 break;
302
303 case PIPE_FUNC_NOTEQUAL:
304 complement = TRUE;
305 /* FALLTHROUGH */
306 case PIPE_FUNC_EQUAL:
307 spe_ceqi(f, stencil_mask, stored, ref);
308 break;
309
310 case PIPE_FUNC_LEQUAL:
311 complement = TRUE;
312 /* FALLTHROUGH */
313 case PIPE_FUNC_GREATER:
314 spe_clgti(f, stencil_mask, stored, ref);
315 break;
316
317 case PIPE_FUNC_LESS:
318 complement = TRUE;
319 /* FALLTHROUGH */
320 case PIPE_FUNC_GEQUAL:
321 spe_clgti(f, stencil_mask, stored, ref);
322 spe_ceqi(f, tmp, stored, ref);
323 spe_or(f, stencil_mask, stencil_mask, tmp);
324 break;
325
326 case PIPE_FUNC_ALWAYS:
327 /* See comment below. */
328 break;
329
330 default:
331 assert(0);
332 break;
333 }
334
335 spe_release_register(f, stored);
336 spe_release_register(f, tmp);
337
338
339 /* ALWAYS is a very common stencil-test, so some effort is applied to
340 * optimize that case. The stencil-pass mask is the same as the input
341 * fragment mask. This makes the stencil-test (above) a no-op, and the
342 * input fragment mask can be "renamed" the stencil-pass mask.
343 */
344 if (dsa->stencil[face].func == PIPE_FUNC_ALWAYS) {
345 spe_release_register(f, stencil_pass);
346 stencil_pass = mask;
347 } else {
348 if (complement) {
349 spe_andc(f, stencil_pass, mask, stencil_mask);
350 } else {
351 spe_and(f, stencil_pass, mask, stencil_mask);
352 }
353 }
354
355 if (depth_complement) {
356 spe_andc(f, depth_pass, stencil_pass, depth_mask);
357 } else {
358 spe_and(f, depth_pass, stencil_pass, depth_mask);
359 }
360
361
362 /* Conditionally emit code to update the stencil value under various
363 * condititons. Note that there is no need to generate code under the
364 * following circumstances:
365 *
366 * - Stencil write mask is zero.
367 * - For stencil-fail if the stencil test is ALWAYS
368 * - For depth-fail if the stencil test is NEVER
369 * - For depth-pass if the stencil test is NEVER
370 * - Any of the 3 conditions if the operation is KEEP
371 */
372 if (dsa->stencil[face].write_mask != 0) {
373 if ((dsa->stencil[face].func != PIPE_FUNC_ALWAYS)
374 && (dsa->stencil[face].fail_op != PIPE_STENCIL_OP_KEEP)) {
375 if (complement) {
376 spe_and(f, stencil_fail, mask, stencil_mask);
377 } else {
378 spe_andc(f, stencil_fail, mask, stencil_mask);
379 }
380
381 emit_stencil_op(f, face_stencil, stencil_src, stencil_fail,
382 dsa->stencil[face].fail_op,
383 dsa->stencil[face].ref_value);
384
385 stencil_src = face_stencil;
386 }
387
388 if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
389 && (dsa->stencil[face].zfail_op != PIPE_STENCIL_OP_KEEP)) {
390 if (depth_complement) {
391 spe_and(f, depth_fail, stencil_pass, depth_mask);
392 } else {
393 spe_andc(f, depth_fail, stencil_pass, depth_mask);
394 }
395
396 emit_stencil_op(f, face_stencil, stencil_src, depth_fail,
397 dsa->stencil[face].zfail_op,
398 dsa->stencil[face].ref_value);
399 stencil_src = face_stencil;
400 }
401
402 if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
403 && (dsa->stencil[face].zpass_op != PIPE_STENCIL_OP_KEEP)) {
404 emit_stencil_op(f, face_stencil, stencil_src, depth_pass,
405 dsa->stencil[face].zpass_op,
406 dsa->stencil[face].ref_value);
407 stencil_src = face_stencil;
408 }
409 }
410
411 spe_release_register(f, stencil_fail);
412 spe_release_register(f, depth_fail);
413 spe_release_register(f, stencil_mask);
414 if (stencil_pass != mask) {
415 spe_release_register(f, stencil_pass);
416 }
417
418 /* If all of the stencil operations were KEEP or the stencil write mask was
419 * zero, "stencil_src" will still be set to "stencil". In this case
420 * release the "face_stencil" register. Otherwise apply the stencil write
421 * mask to select bits from the calculated stencil value and the previous
422 * stencil value.
423 */
424 if (stencil_src == stencil) {
425 spe_release_register(f, face_stencil);
426 } else if (dsa->stencil[face].write_mask != 0x0ff) {
427 int tmp = spe_allocate_available_register(f);
428
429 spe_il(f, tmp, dsa->stencil[face].write_mask);
430 spe_selb(f, stencil_src, stencil, stencil_src, tmp);
431
432 spe_release_register(f, tmp);
433 }
434
435 return stencil_src;
436 }
437
438
439 void
440 cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa)
441 {
442 struct pipe_depth_stencil_alpha_state *const dsa = &cdsa->base;
443 struct spe_function *const f = &cdsa->code;
444
445 /* This code generates a maximum of 6 (alpha test) + 3 (depth test)
446 * + 25 (front stencil) + 25 (back stencil) + 4 = 63 instructions. Round
447 * up to 64 to make it a happy power-of-two.
448 */
449 spe_init_func(f, 4 * 64);
450
451
452 /* Allocate registers for the function's input parameters. Cleverly (and
453 * clever code is usually dangerous, but I couldn't resist) the generated
454 * function returns a structure. Returned structures start with register
455 * 3, and the structure fields are ordered to match up exactly with the
456 * input parameters.
457 */
458 int mask = spe_allocate_register(f, 3);
459 int depth = spe_allocate_register(f, 4);
460 int stencil = spe_allocate_register(f, 5);
461 int zvals = spe_allocate_register(f, 6);
462 int frag_a = spe_allocate_register(f, 7);
463 int facing = spe_allocate_register(f, 8);
464
465 int depth_mask = spe_allocate_available_register(f);
466
467 boolean depth_complement;
468
469
470 emit_alpha_test(dsa, f, mask, frag_a);
471
472 depth_complement = emit_depth_test(dsa, f, depth_mask, depth, zvals);
473
474 if (dsa->stencil[0].enabled) {
475 const int front_depth_pass = spe_allocate_available_register(f);
476 int front_stencil = emit_stencil_test(dsa, 0, f, mask,
477 depth_mask, depth_complement,
478 stencil, front_depth_pass);
479
480 if (dsa->stencil[1].enabled) {
481 const int back_depth_pass = spe_allocate_available_register(f);
482 int back_stencil = emit_stencil_test(dsa, 1, f, mask,
483 depth_mask, depth_complement,
484 stencil, back_depth_pass);
485
486 /* If the front facing stencil value and the back facing stencil
487 * value are stored in the same register, there is no need to select
488 * a value based on the facing. This can happen if the stencil value
489 * was not modified due to the write masks being zero, the stencil
490 * operations being KEEP, etc.
491 */
492 if (front_stencil != back_stencil) {
493 spe_selb(f, stencil, back_stencil, front_stencil, facing);
494 }
495
496 if (back_stencil != stencil) {
497 spe_release_register(f, back_stencil);
498 }
499
500 if (front_stencil != stencil) {
501 spe_release_register(f, front_stencil);
502 }
503
504 spe_selb(f, mask, back_depth_pass, front_depth_pass, facing);
505
506 spe_release_register(f, back_depth_pass);
507 } else {
508 if (front_stencil != stencil) {
509 spe_or(f, stencil, front_stencil, front_stencil);
510 spe_release_register(f, front_stencil);
511 }
512 }
513
514 spe_release_register(f, front_depth_pass);
515 } else if (dsa->depth.enabled) {
516 if (depth_complement) {
517 spe_andc(f, mask, mask, depth_mask);
518 } else {
519 spe_and(f, mask, mask, depth_mask);
520 }
521 }
522
523 if (dsa->depth.writemask) {
524 spe_selb(f, depth, depth, zvals, mask);
525 }
526
527 spe_bi(f, 0, 0, 0);
528
529
530 #if 0
531 {
532 const uint32_t *p = f->store;
533 unsigned i;
534
535 printf("# alpha (%sabled)\n",
536 (dsa->alpha.enabled) ? "en" : "dis");
537 printf("# func: %u\n", dsa->alpha.func);
538 printf("# ref: %.2f\n", dsa->alpha.ref);
539
540 printf("# depth (%sabled)\n",
541 (dsa->depth.enabled) ? "en" : "dis");
542 printf("# func: %u\n", dsa->depth.func);
543
544 for (i = 0; i < 2; i++) {
545 printf("# %s stencil (%sabled)\n",
546 (i == 0) ? "front" : "back",
547 (dsa->stencil[i].enabled) ? "en" : "dis");
548
549 printf("# func: %u\n", dsa->stencil[i].func);
550 printf("# op (sf, zf, zp): %u %u %u\n",
551 dsa->stencil[i].fail_op,
552 dsa->stencil[i].zfail_op,
553 dsa->stencil[i].zpass_op);
554 printf("# ref value / value mask / write mask: %02x %02x %02x\n",
555 dsa->stencil[i].ref_value,
556 dsa->stencil[i].value_mask,
557 dsa->stencil[i].write_mask);
558 }
559
560 printf("\t.text\n");
561 for (/* empty */; p < f->csr; p++) {
562 printf("\t.long\t0x%04x\n", *p);
563 }
564 fflush(stdout);
565 }
566 #endif
567 }
568
569
570 /**
571 * \note Emits a maximum of 3 instructions
572 */
573 static int
574 emit_alpha_factor_calculation(struct spe_function *f,
575 unsigned factor, float const_alpha,
576 int src_alpha, int dst_alpha)
577 {
578 union {
579 float f;
580 unsigned u;
581 } alpha;
582 int factor_reg;
583 int tmp;
584
585
586 alpha.f = const_alpha;
587
588 switch (factor) {
589 case PIPE_BLENDFACTOR_ONE:
590 factor_reg = -1;
591 break;
592
593 case PIPE_BLENDFACTOR_SRC_ALPHA:
594 factor_reg = spe_allocate_available_register(f);
595
596 spe_or(f, factor_reg, src_alpha, src_alpha);
597 break;
598
599 case PIPE_BLENDFACTOR_DST_ALPHA:
600 factor_reg = dst_alpha;
601 break;
602
603 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
604 factor_reg = -1;
605 break;
606
607 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
608 const_alpha = 1.0 - const_alpha;
609 /* FALLTHROUGH */
610 case PIPE_BLENDFACTOR_CONST_ALPHA:
611 factor_reg = spe_allocate_available_register(f);
612
613 spe_il(f, factor_reg, alpha.u & 0x0ffff);
614 spe_ilh(f, factor_reg, alpha.u >> 16);
615 break;
616
617 case PIPE_BLENDFACTOR_ZERO:
618 factor_reg = -1;
619 break;
620
621 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
622 tmp = spe_allocate_available_register(f);
623 factor_reg = spe_allocate_available_register(f);
624
625 spe_il(f, tmp, 1);
626 spe_cuflt(f, tmp, tmp, 0);
627 spe_fs(f, factor_reg, tmp, src_alpha);
628
629 spe_release_register(f, tmp);
630 break;
631
632 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
633 tmp = spe_allocate_available_register(f);
634 factor_reg = spe_allocate_available_register(f);
635
636 spe_il(f, tmp, 1);
637 spe_cuflt(f, tmp, tmp, 0);
638 spe_fs(f, factor_reg, tmp, dst_alpha);
639
640 spe_release_register(f, tmp);
641 break;
642
643 case PIPE_BLENDFACTOR_SRC1_ALPHA:
644 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
645 default:
646 assert(0);
647 factor_reg = -1;
648 break;
649 }
650
651 return factor_reg;
652 }
653
654
655 /**
656 * \note Emits a maximum of 5 instructions
657 */
658 static void
659 emit_color_factor_calculation(struct spe_function *f,
660 unsigned sF, unsigned mask,
661 const struct pipe_blend_color *blend_color,
662 const int *src,
663 const int *dst,
664 int *factor)
665 {
666 union {
667 float f[4];
668 unsigned u[4];
669 } color;
670 int tmp;
671 unsigned i;
672
673
674 color.f[0] = blend_color->color[0];
675 color.f[1] = blend_color->color[1];
676 color.f[2] = blend_color->color[2];
677 color.f[3] = blend_color->color[3];
678
679 factor[0] = -1;
680 factor[1] = -1;
681 factor[2] = -1;
682 factor[3] = -1;
683
684 switch (sF) {
685 case PIPE_BLENDFACTOR_ONE:
686 break;
687
688 case PIPE_BLENDFACTOR_SRC_COLOR:
689 for (i = 0; i < 3; ++i) {
690 if ((mask & (1U << i)) != 0) {
691 factor[i] = spe_allocate_available_register(f);
692 spe_or(f, factor[i], src[i], src[i]);
693 }
694 }
695 break;
696
697 case PIPE_BLENDFACTOR_SRC_ALPHA:
698 factor[0] = spe_allocate_available_register(f);
699 factor[1] = factor[0];
700 factor[2] = factor[0];
701
702 spe_or(f, factor[0], src[3], src[3]);
703 break;
704
705 case PIPE_BLENDFACTOR_DST_ALPHA:
706 factor[0] = dst[3];
707 factor[1] = dst[3];
708 factor[2] = dst[3];
709 break;
710
711 case PIPE_BLENDFACTOR_DST_COLOR:
712 factor[0] = dst[0];
713 factor[1] = dst[1];
714 factor[2] = dst[2];
715 break;
716
717 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
718 tmp = spe_allocate_available_register(f);
719 factor[0] = spe_allocate_available_register(f);
720 factor[1] = factor[0];
721 factor[2] = factor[0];
722
723 /* Alpha saturate means min(As, 1-Ad).
724 */
725 spe_il(f, tmp, 1);
726 spe_cuflt(f, tmp, tmp, 0);
727 spe_fs(f, tmp, tmp, dst[3]);
728 spe_fcgt(f, factor[0], tmp, src[3]);
729 spe_selb(f, factor[0], src[3], tmp, factor[0]);
730
731 spe_release_register(f, tmp);
732 break;
733
734 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
735 color.f[0] = 1.0 - color.f[0];
736 color.f[1] = 1.0 - color.f[1];
737 color.f[2] = 1.0 - color.f[2];
738 /* FALLTHROUGH */
739 case PIPE_BLENDFACTOR_CONST_COLOR:
740 for (i = 0; i < 3; i++) {
741 factor[i] = spe_allocate_available_register(f);
742
743 spe_il(f, factor[i], color.u[i] & 0x0ffff);
744 spe_ilh(f, factor[i], color.u[i] >> 16);
745 }
746 break;
747
748 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
749 color.f[3] = 1.0 - color.f[3];
750 /* FALLTHROUGH */
751 case PIPE_BLENDFACTOR_CONST_ALPHA:
752 factor[0] = spe_allocate_available_register(f);
753 factor[1] = factor[0];
754 factor[2] = factor[0];
755
756 spe_il(f, factor[0], color.u[3] & 0x0ffff);
757 spe_ilh(f, factor[0], color.u[3] >> 16);
758 break;
759
760 case PIPE_BLENDFACTOR_ZERO:
761 break;
762
763 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
764 tmp = spe_allocate_available_register(f);
765
766 spe_il(f, tmp, 1);
767 spe_cuflt(f, tmp, tmp, 0);
768
769 for (i = 0; i < 3; ++i) {
770 if ((mask & (1U << i)) != 0) {
771 factor[i] = spe_allocate_available_register(f);
772 spe_fs(f, factor[i], tmp, src[i]);
773 }
774 }
775
776 spe_release_register(f, tmp);
777 break;
778
779 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
780 tmp = spe_allocate_available_register(f);
781 factor[0] = spe_allocate_available_register(f);
782 factor[1] = factor[0];
783 factor[2] = factor[0];
784
785 spe_il(f, tmp, 1);
786 spe_cuflt(f, tmp, tmp, 0);
787 spe_fs(f, factor[0], tmp, src[3]);
788
789 spe_release_register(f, tmp);
790 break;
791
792 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
793 tmp = spe_allocate_available_register(f);
794 factor[0] = spe_allocate_available_register(f);
795 factor[1] = factor[0];
796 factor[2] = factor[0];
797
798 spe_il(f, tmp, 1);
799 spe_cuflt(f, tmp, tmp, 0);
800 spe_fs(f, factor[0], tmp, dst[3]);
801
802 spe_release_register(f, tmp);
803 break;
804
805 case PIPE_BLENDFACTOR_INV_DST_COLOR:
806 tmp = spe_allocate_available_register(f);
807
808 spe_il(f, tmp, 1);
809 spe_cuflt(f, tmp, tmp, 0);
810
811 for (i = 0; i < 3; ++i) {
812 if ((mask & (1U << i)) != 0) {
813 factor[i] = spe_allocate_available_register(f);
814 spe_fs(f, factor[i], tmp, dst[i]);
815 }
816 }
817
818 spe_release_register(f, tmp);
819 break;
820
821 case PIPE_BLENDFACTOR_SRC1_COLOR:
822 case PIPE_BLENDFACTOR_SRC1_ALPHA:
823 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
824 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
825 default:
826 assert(0);
827 }
828 }
829
830
831 static void
832 emit_blend_calculation(struct spe_function *f,
833 unsigned func, unsigned sF, unsigned dF,
834 int src, int src_factor, int dst, int dst_factor)
835 {
836 int tmp = spe_allocate_available_register(f);
837
838 switch (func) {
839 case PIPE_BLEND_ADD:
840 if (sF == PIPE_BLENDFACTOR_ONE) {
841 if (dF == PIPE_BLENDFACTOR_ZERO) {
842 /* Do nothing. */
843 } else if (dF == PIPE_BLENDFACTOR_ONE) {
844 spe_fa(f, src, src, dst);
845 }
846 } else if (sF == PIPE_BLENDFACTOR_ZERO) {
847 if (dF == PIPE_BLENDFACTOR_ZERO) {
848 spe_il(f, src, 0);
849 } else if (dF == PIPE_BLENDFACTOR_ONE) {
850 spe_or(f, src, dst, dst);
851 }
852 } else {
853 spe_fm(f, tmp, dst, dst_factor);
854 spe_fma(f, src, src, src_factor, tmp);
855 }
856 break;
857
858 case PIPE_BLEND_SUBTRACT:
859 if (sF == PIPE_BLENDFACTOR_ONE) {
860 if (dF == PIPE_BLENDFACTOR_ZERO) {
861 /* Do nothing. */
862 } else if (dF == PIPE_BLENDFACTOR_ONE) {
863 spe_fs(f, src, src, dst);
864 }
865 } else if (sF == PIPE_BLENDFACTOR_ZERO) {
866 if (dF == PIPE_BLENDFACTOR_ZERO) {
867 spe_il(f, src, 0);
868 } else if (dF == PIPE_BLENDFACTOR_ONE) {
869 spe_il(f, tmp, 0);
870 spe_fs(f, src, tmp, dst);
871 }
872 } else {
873 spe_fm(f, tmp, dst, dst_factor);
874 spe_fms(f, src, src, src_factor, tmp);
875 }
876 break;
877
878 case PIPE_BLEND_REVERSE_SUBTRACT:
879 if (sF == PIPE_BLENDFACTOR_ONE) {
880 if (dF == PIPE_BLENDFACTOR_ZERO) {
881 spe_il(f, tmp, 0);
882 spe_fs(f, src, tmp, src);
883 } else if (dF == PIPE_BLENDFACTOR_ONE) {
884 spe_fs(f, src, dst, src);
885 }
886 } else if (sF == PIPE_BLENDFACTOR_ZERO) {
887 if (dF == PIPE_BLENDFACTOR_ZERO) {
888 spe_il(f, src, 0);
889 } else if (dF == PIPE_BLENDFACTOR_ONE) {
890 spe_or(f, src, dst, dst);
891 }
892 } else {
893 spe_fm(f, tmp, src, src_factor);
894 spe_fms(f, src, src, dst_factor, tmp);
895 }
896 break;
897
898 case PIPE_BLEND_MIN:
899 spe_cgt(f, tmp, src, dst);
900 spe_selb(f, src, dst, src, tmp);
901 break;
902
903 case PIPE_BLEND_MAX:
904 spe_cgt(f, tmp, src, dst);
905 spe_selb(f, src, src, dst, tmp);
906 break;
907
908 default:
909 assert(0);
910 }
911
912 spe_release_register(f, tmp);
913 }
914
915
916 /**
917 * Generate code to perform alpha blending on the SPE
918 */
919 void
920 cell_generate_alpha_blend(struct cell_blend_state *cb,
921 const struct pipe_blend_color *blend_color)
922 {
923 struct pipe_blend_state *const b = &cb->base;
924 struct spe_function *const f = &cb->code;
925
926 /* This code generates a maximum of 3 (source alpha factor)
927 * + 3 (destination alpha factor) + (3 * 5) (source color factor)
928 * + (3 * 5) (destination color factor) + (4 * 2) (blend equation)
929 * + 4 (fragment mask) + 1 (return) = 49 instlructions. Round up to 64 to
930 * make it a happy power-of-two.
931 */
932 spe_init_func(f, 4 * 64);
933
934
935 const int frag[4] = {
936 spe_allocate_register(f, 3),
937 spe_allocate_register(f, 4),
938 spe_allocate_register(f, 5),
939 spe_allocate_register(f, 6),
940 };
941 const int pixel[4] = {
942 spe_allocate_register(f, 7),
943 spe_allocate_register(f, 8),
944 spe_allocate_register(f, 9),
945 spe_allocate_register(f, 10),
946 };
947 const int mask = spe_allocate_register(f, 11);
948 unsigned func[4];
949 unsigned sF[4];
950 unsigned dF[4];
951 unsigned i;
952 int src_factor[4];
953 int dst_factor[4];
954
955
956 /* Does the selected blend mode make use of the source / destination
957 * color (RGB) blend factors?
958 */
959 boolean need_color_factor = b->blend_enable
960 && (b->rgb_func != PIPE_BLEND_MIN)
961 && (b->rgb_func != PIPE_BLEND_MAX);
962
963 /* Does the selected blend mode make use of the source / destination
964 * alpha blend factors?
965 */
966 boolean need_alpha_factor = b->blend_enable
967 && (b->alpha_func != PIPE_BLEND_MIN)
968 && (b->alpha_func != PIPE_BLEND_MAX);
969
970
971 sF[0] = b->rgb_src_factor;
972 sF[1] = sF[0];
973 sF[2] = sF[0];
974 sF[3] = (b->alpha_src_factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)
975 ? PIPE_BLENDFACTOR_ONE : b->alpha_src_factor;
976
977 dF[0] = b->rgb_dst_factor;
978 dF[1] = dF[0];
979 dF[2] = dF[0];
980 dF[3] = b->rgb_dst_factor;
981
982
983 /* If alpha writing is enabled and the alpha blend mode requires use of
984 * the alpha factor, calculate the alpha factor.
985 */
986 if (((b->colormask & 8) != 0) && need_alpha_factor) {
987 src_factor[3] = emit_alpha_factor_calculation(f, sF[3],
988 blend_color->color[3],
989 frag[3], pixel[3]);
990
991 /* If the alpha destination blend factor is the same as the alpha source
992 * blend factor, re-use the previously calculated value.
993 */
994 dst_factor[3] = (dF[3] == sF[3])
995 ? src_factor[3]
996 : emit_alpha_factor_calculation(f, dF[3],
997 blend_color->color[3],
998 frag[3], pixel[3]);
999 }
1000
1001
1002 if (sF[0] == sF[3]) {
1003 src_factor[0] = src_factor[3];
1004 src_factor[1] = src_factor[3];
1005 src_factor[2] = src_factor[3];
1006 } else if (sF[0] == dF[3]) {
1007 src_factor[0] = dst_factor[3];
1008 src_factor[1] = dst_factor[3];
1009 src_factor[2] = dst_factor[3];
1010 } else if (need_color_factor) {
1011 emit_color_factor_calculation(f,
1012 b->rgb_src_factor,
1013 b->colormask,
1014 blend_color,
1015 frag, pixel, src_factor);
1016 }
1017
1018
1019 if (dF[0] == sF[3]) {
1020 dst_factor[0] = src_factor[3];
1021 dst_factor[1] = src_factor[3];
1022 dst_factor[2] = src_factor[3];
1023 } else if (dF[0] == dF[3]) {
1024 dst_factor[0] = dst_factor[3];
1025 dst_factor[1] = dst_factor[3];
1026 dst_factor[2] = dst_factor[3];
1027 } else if (dF[0] == sF[0]) {
1028 dst_factor[0] = src_factor[0];
1029 dst_factor[1] = src_factor[1];
1030 dst_factor[2] = src_factor[2];
1031 } else if (need_color_factor) {
1032 emit_color_factor_calculation(f,
1033 b->rgb_dst_factor,
1034 b->colormask,
1035 blend_color,
1036 frag, pixel, dst_factor);
1037 }
1038
1039
1040
1041 func[0] = b->rgb_func;
1042 func[1] = func[0];
1043 func[2] = func[0];
1044 func[3] = b->alpha_func;
1045
1046 for (i = 0; i < 4; ++i) {
1047 if ((b->colormask & (1U << i)) != 0) {
1048 emit_blend_calculation(f,
1049 func[i], sF[i], dF[i],
1050 frag[i], src_factor[i],
1051 pixel[i], dst_factor[i]);
1052 spe_selb(f, frag[i], pixel[i], frag[i], mask);
1053 } else {
1054 spe_or(f, frag[i], pixel[i], pixel[i]);
1055 }
1056 }
1057
1058 spe_bi(f, 0, 0, 0);
1059 }