2 * (C) Copyright IBM Corporation 2008
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
27 * Generate code to perform all per-fragment operations.
29 * Code generated by these functions perform both alpha, depth, and stencil
30 * testing as well as alpha blending.
33 * Occlusion query is not supported, but this is the right place to add that
36 * \author Ian Romanick <idr@us.ibm.com>
39 #include "pipe/p_defines.h"
40 #include "pipe/p_state.h"
42 #include "cell_context.h"
44 #include "rtasm/rtasm_ppc_spe.h"
48 * Generate code to perform alpha testing.
50 * The code generated by this function uses the register specificed by
51 * \c mask as both an input and an output.
53 * \param dsa Current alpha-test state
54 * \param f Function to which code should be appended
55 * \param mask Index of register containing active fragment mask
56 * \param alphas Index of register containing per-fragment alpha values
58 * \note Emits a maximum of 6 instructions.
61 emit_alpha_test(struct pipe_depth_stencil_alpha_state
*dsa
,
62 struct spe_function
*f
, int mask
, int alphas
)
64 /* If the alpha function is either NEVER or ALWAYS, there is no need to
65 * load the reference value into a register. ALWAYS is a fairly common
66 * case, and this optimization saves 2 instructions.
68 if (dsa
->alpha
.enabled
69 && (dsa
->alpha
.func
!= PIPE_FUNC_NEVER
)
70 && (dsa
->alpha
.func
!= PIPE_FUNC_ALWAYS
)) {
71 int ref
= spe_allocate_available_register(f
);
72 int tmp_a
= spe_allocate_available_register(f
);
73 int tmp_b
= spe_allocate_available_register(f
);
78 boolean complement
= FALSE
;
80 ref_val
.f
= dsa
->alpha
.ref
;
82 spe_il(f
, ref
, ref_val
.u
& 0x0000ffff);
83 spe_ilh(f
, ref
, ref_val
.u
>> 16);
85 switch (dsa
->alpha
.func
) {
86 case PIPE_FUNC_NOTEQUAL
:
91 spe_fceq(f
, tmp_a
, ref
, alphas
);
94 case PIPE_FUNC_LEQUAL
:
98 case PIPE_FUNC_GREATER
:
99 spe_fcgt(f
, tmp_a
, ref
, alphas
);
106 case PIPE_FUNC_GEQUAL
:
107 spe_fcgt(f
, tmp_a
, ref
, alphas
);
108 spe_fceq(f
, tmp_b
, ref
, alphas
);
109 spe_or(f
, tmp_a
, tmp_b
, tmp_a
);
112 case PIPE_FUNC_ALWAYS
:
113 case PIPE_FUNC_NEVER
:
120 spe_andc(f
, mask
, mask
, tmp_a
);
122 spe_and(f
, mask
, mask
, tmp_a
);
125 spe_release_register(f
, ref
);
126 spe_release_register(f
, tmp_a
);
127 spe_release_register(f
, tmp_b
);
128 } else if (dsa
->alpha
.enabled
&& (dsa
->alpha
.func
== PIPE_FUNC_NEVER
)) {
135 * \param dsa Current depth-test state
136 * \param f Function to which code should be appended
137 * \param m Mask of allocated / free SPE registers
138 * \param mask Index of register to contain depth-pass mask
139 * \param stored Index of register containing values from depth buffer
140 * \param calculated Index of register containing per-fragment depth values
143 * If the calculated depth comparison mask is the actual mask, \c FALSE is
144 * returned. If the calculated depth comparison mask is the compliment of
145 * the actual mask, \c TRUE is returned.
147 * \note Emits a maximum of 3 instructions.
150 emit_depth_test(struct pipe_depth_stencil_alpha_state
*dsa
,
151 struct spe_function
*f
, int mask
, int stored
, int calculated
)
153 unsigned func
= (dsa
->depth
.enabled
)
154 ? dsa
->depth
.func
: PIPE_FUNC_ALWAYS
;
155 int tmp
= spe_allocate_available_register(f
);
156 boolean compliment
= FALSE
;
159 case PIPE_FUNC_NEVER
:
163 case PIPE_FUNC_NOTEQUAL
:
166 case PIPE_FUNC_EQUAL
:
167 spe_ceq(f
, mask
, calculated
, stored
);
170 case PIPE_FUNC_LEQUAL
:
173 case PIPE_FUNC_GREATER
:
174 spe_clgt(f
, mask
, calculated
, stored
);
180 case PIPE_FUNC_GEQUAL
:
181 spe_clgt(f
, mask
, calculated
, stored
);
182 spe_ceq(f
, tmp
, calculated
, stored
);
183 spe_or(f
, mask
, mask
, tmp
);
186 case PIPE_FUNC_ALWAYS
:
195 spe_release_register(f
, tmp
);
201 * \note Emits a maximum of 5 instructions.
204 emit_stencil_op(struct spe_function
*f
,
205 int out
, int in
, int mask
, unsigned op
, unsigned ref
)
207 const int clamp
= spe_allocate_available_register(f
);
208 const int tmp
= spe_allocate_available_register(f
);
211 case PIPE_STENCIL_OP_KEEP
:
213 case PIPE_STENCIL_OP_ZERO
:
216 case PIPE_STENCIL_OP_REPLACE
:
219 case PIPE_STENCIL_OP_INCR
:
220 spe_il(f
, clamp
, 0x0ff);
221 spe_ai(f
, out
, in
, 1);
222 spe_cgti(f
, tmp
, out
, clamp
);
223 spe_selb(f
, out
, out
, clamp
, tmp
);
225 case PIPE_STENCIL_OP_DECR
:
227 spe_ai(f
, out
, in
, -1);
228 spe_cgti(f
, tmp
, out
, clamp
);
229 spe_selb(f
, out
, clamp
, out
, tmp
);
231 case PIPE_STENCIL_OP_INCR_WRAP
:
232 spe_ai(f
, out
, in
, 1);
234 case PIPE_STENCIL_OP_DECR_WRAP
:
235 spe_ai(f
, out
, in
, -1);
237 case PIPE_STENCIL_OP_INVERT
:
238 spe_nor(f
, out
, in
, in
);
244 spe_release_register(f
, tmp
);
245 spe_release_register(f
, clamp
);
247 spe_selb(f
, out
, in
, out
, mask
);
252 * \param dsa Depth / stencil test state
253 * \param face 0 for front face, 1 for back face
254 * \param f Function to append instructions to
255 * \param reg_mask Mask of allocated registers
256 * \param mask Register containing mask of fragments passing the
258 * \param depth_mask Register containing mask of fragments passing the
260 * \param depth_compliment Is \c depth_mask the compliment of the actual mask?
261 * \param stencil Register containing values from stencil buffer
262 * \param depth_pass Register to store mask of fragments passing stencil test
266 * Emits a maximum of 10 + (3 * 5) = 25 instructions.
269 emit_stencil_test(struct pipe_depth_stencil_alpha_state
*dsa
,
271 struct spe_function
*f
,
274 boolean depth_complement
,
278 int stencil_fail
= spe_allocate_available_register(f
);
279 int depth_fail
= spe_allocate_available_register(f
);
280 int stencil_mask
= spe_allocate_available_register(f
);
281 int stencil_pass
= spe_allocate_available_register(f
);
282 int face_stencil
= spe_allocate_available_register(f
);
283 int stencil_src
= stencil
;
284 const unsigned ref
= (dsa
->stencil
[face
].ref_value
285 & dsa
->stencil
[face
].value_mask
);
286 boolean complement
= FALSE
;
287 int stored
= spe_allocate_available_register(f
);
288 int tmp
= spe_allocate_available_register(f
);
291 if ((dsa
->stencil
[face
].func
!= PIPE_FUNC_NEVER
)
292 && (dsa
->stencil
[face
].func
!= PIPE_FUNC_ALWAYS
)
293 && (dsa
->stencil
[face
].value_mask
!= 0x0ff)) {
294 spe_andi(f
, stored
, stencil
, dsa
->stencil
[face
].value_mask
);
298 switch (dsa
->stencil
[face
].func
) {
299 case PIPE_FUNC_NEVER
:
300 spe_il(f
, stencil_mask
, 0);
303 case PIPE_FUNC_NOTEQUAL
:
306 case PIPE_FUNC_EQUAL
:
307 spe_ceqi(f
, stencil_mask
, stored
, ref
);
310 case PIPE_FUNC_LEQUAL
:
313 case PIPE_FUNC_GREATER
:
314 spe_clgti(f
, stencil_mask
, stored
, ref
);
320 case PIPE_FUNC_GEQUAL
:
321 spe_clgti(f
, stencil_mask
, stored
, ref
);
322 spe_ceqi(f
, tmp
, stored
, ref
);
323 spe_or(f
, stencil_mask
, stencil_mask
, tmp
);
326 case PIPE_FUNC_ALWAYS
:
327 /* See comment below. */
335 spe_release_register(f
, stored
);
336 spe_release_register(f
, tmp
);
339 /* ALWAYS is a very common stencil-test, so some effort is applied to
340 * optimize that case. The stencil-pass mask is the same as the input
341 * fragment mask. This makes the stencil-test (above) a no-op, and the
342 * input fragment mask can be "renamed" the stencil-pass mask.
344 if (dsa
->stencil
[face
].func
== PIPE_FUNC_ALWAYS
) {
345 spe_release_register(f
, stencil_pass
);
349 spe_andc(f
, stencil_pass
, mask
, stencil_mask
);
351 spe_and(f
, stencil_pass
, mask
, stencil_mask
);
355 if (depth_complement
) {
356 spe_andc(f
, depth_pass
, stencil_pass
, depth_mask
);
358 spe_and(f
, depth_pass
, stencil_pass
, depth_mask
);
362 /* Conditionally emit code to update the stencil value under various
363 * condititons. Note that there is no need to generate code under the
364 * following circumstances:
366 * - Stencil write mask is zero.
367 * - For stencil-fail if the stencil test is ALWAYS
368 * - For depth-fail if the stencil test is NEVER
369 * - For depth-pass if the stencil test is NEVER
370 * - Any of the 3 conditions if the operation is KEEP
372 if (dsa
->stencil
[face
].write_mask
!= 0) {
373 if ((dsa
->stencil
[face
].func
!= PIPE_FUNC_ALWAYS
)
374 && (dsa
->stencil
[face
].fail_op
!= PIPE_STENCIL_OP_KEEP
)) {
376 spe_and(f
, stencil_fail
, mask
, stencil_mask
);
378 spe_andc(f
, stencil_fail
, mask
, stencil_mask
);
381 emit_stencil_op(f
, face_stencil
, stencil_src
, stencil_fail
,
382 dsa
->stencil
[face
].fail_op
,
383 dsa
->stencil
[face
].ref_value
);
385 stencil_src
= face_stencil
;
388 if ((dsa
->stencil
[face
].func
!= PIPE_FUNC_NEVER
)
389 && (dsa
->stencil
[face
].zfail_op
!= PIPE_STENCIL_OP_KEEP
)) {
390 if (depth_complement
) {
391 spe_and(f
, depth_fail
, stencil_pass
, depth_mask
);
393 spe_andc(f
, depth_fail
, stencil_pass
, depth_mask
);
396 emit_stencil_op(f
, face_stencil
, stencil_src
, depth_fail
,
397 dsa
->stencil
[face
].zfail_op
,
398 dsa
->stencil
[face
].ref_value
);
399 stencil_src
= face_stencil
;
402 if ((dsa
->stencil
[face
].func
!= PIPE_FUNC_NEVER
)
403 && (dsa
->stencil
[face
].zpass_op
!= PIPE_STENCIL_OP_KEEP
)) {
404 emit_stencil_op(f
, face_stencil
, stencil_src
, depth_pass
,
405 dsa
->stencil
[face
].zpass_op
,
406 dsa
->stencil
[face
].ref_value
);
407 stencil_src
= face_stencil
;
411 spe_release_register(f
, stencil_fail
);
412 spe_release_register(f
, depth_fail
);
413 spe_release_register(f
, stencil_mask
);
414 if (stencil_pass
!= mask
) {
415 spe_release_register(f
, stencil_pass
);
418 /* If all of the stencil operations were KEEP or the stencil write mask was
419 * zero, "stencil_src" will still be set to "stencil". In this case
420 * release the "face_stencil" register. Otherwise apply the stencil write
421 * mask to select bits from the calculated stencil value and the previous
424 if (stencil_src
== stencil
) {
425 spe_release_register(f
, face_stencil
);
426 } else if (dsa
->stencil
[face
].write_mask
!= 0x0ff) {
427 int tmp
= spe_allocate_available_register(f
);
429 spe_il(f
, tmp
, dsa
->stencil
[face
].write_mask
);
430 spe_selb(f
, stencil_src
, stencil
, stencil_src
, tmp
);
432 spe_release_register(f
, tmp
);
440 cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state
*cdsa
)
442 struct pipe_depth_stencil_alpha_state
*const dsa
= &cdsa
->base
;
443 struct spe_function
*const f
= &cdsa
->code
;
445 /* This code generates a maximum of 6 (alpha test) + 3 (depth test)
446 * + 25 (front stencil) + 25 (back stencil) + 4 = 63 instructions. Round
447 * up to 64 to make it a happy power-of-two.
449 spe_init_func(f
, 4 * 64);
452 /* Allocate registers for the function's input parameters. Cleverly (and
453 * clever code is usually dangerous, but I couldn't resist) the generated
454 * function returns a structure. Returned structures start with register
455 * 3, and the structure fields are ordered to match up exactly with the
458 int mask
= spe_allocate_register(f
, 3);
459 int depth
= spe_allocate_register(f
, 4);
460 int stencil
= spe_allocate_register(f
, 5);
461 int zvals
= spe_allocate_register(f
, 6);
462 int frag_a
= spe_allocate_register(f
, 7);
463 int facing
= spe_allocate_register(f
, 8);
465 int depth_mask
= spe_allocate_available_register(f
);
467 boolean depth_complement
;
470 emit_alpha_test(dsa
, f
, mask
, frag_a
);
472 depth_complement
= emit_depth_test(dsa
, f
, depth_mask
, depth
, zvals
);
474 if (dsa
->stencil
[0].enabled
) {
475 const int front_depth_pass
= spe_allocate_available_register(f
);
476 int front_stencil
= emit_stencil_test(dsa
, 0, f
, mask
,
477 depth_mask
, depth_complement
,
478 stencil
, front_depth_pass
);
480 if (dsa
->stencil
[1].enabled
) {
481 const int back_depth_pass
= spe_allocate_available_register(f
);
482 int back_stencil
= emit_stencil_test(dsa
, 1, f
, mask
,
483 depth_mask
, depth_complement
,
484 stencil
, back_depth_pass
);
486 /* If the front facing stencil value and the back facing stencil
487 * value are stored in the same register, there is no need to select
488 * a value based on the facing. This can happen if the stencil value
489 * was not modified due to the write masks being zero, the stencil
490 * operations being KEEP, etc.
492 if (front_stencil
!= back_stencil
) {
493 spe_selb(f
, stencil
, back_stencil
, front_stencil
, facing
);
496 if (back_stencil
!= stencil
) {
497 spe_release_register(f
, back_stencil
);
500 if (front_stencil
!= stencil
) {
501 spe_release_register(f
, front_stencil
);
504 spe_selb(f
, mask
, back_depth_pass
, front_depth_pass
, facing
);
506 spe_release_register(f
, back_depth_pass
);
508 if (front_stencil
!= stencil
) {
509 spe_or(f
, stencil
, front_stencil
, front_stencil
);
510 spe_release_register(f
, front_stencil
);
514 spe_release_register(f
, front_depth_pass
);
515 } else if (dsa
->depth
.enabled
) {
516 if (depth_complement
) {
517 spe_andc(f
, mask
, mask
, depth_mask
);
519 spe_and(f
, mask
, mask
, depth_mask
);
523 if (dsa
->depth
.writemask
) {
524 spe_selb(f
, depth
, depth
, zvals
, mask
);
532 const uint32_t *p
= f
->store
;
535 printf("# alpha (%sabled)\n",
536 (dsa
->alpha
.enabled
) ? "en" : "dis");
537 printf("# func: %u\n", dsa
->alpha
.func
);
538 printf("# ref: %.2f\n", dsa
->alpha
.ref
);
540 printf("# depth (%sabled)\n",
541 (dsa
->depth
.enabled
) ? "en" : "dis");
542 printf("# func: %u\n", dsa
->depth
.func
);
544 for (i
= 0; i
< 2; i
++) {
545 printf("# %s stencil (%sabled)\n",
546 (i
== 0) ? "front" : "back",
547 (dsa
->stencil
[i
].enabled
) ? "en" : "dis");
549 printf("# func: %u\n", dsa
->stencil
[i
].func
);
550 printf("# op (sf, zf, zp): %u %u %u\n",
551 dsa
->stencil
[i
].fail_op
,
552 dsa
->stencil
[i
].zfail_op
,
553 dsa
->stencil
[i
].zpass_op
);
554 printf("# ref value / value mask / write mask: %02x %02x %02x\n",
555 dsa
->stencil
[i
].ref_value
,
556 dsa
->stencil
[i
].value_mask
,
557 dsa
->stencil
[i
].write_mask
);
561 for (/* empty */; p
< f
->csr
; p
++) {
562 printf("\t.long\t0x%04x\n", *p
);
571 * \note Emits a maximum of 3 instructions
574 emit_alpha_factor_calculation(struct spe_function
*f
,
575 unsigned factor
, float const_alpha
,
576 int src_alpha
, int dst_alpha
)
586 alpha
.f
= const_alpha
;
589 case PIPE_BLENDFACTOR_ONE
:
593 case PIPE_BLENDFACTOR_SRC_ALPHA
:
594 factor_reg
= spe_allocate_available_register(f
);
596 spe_or(f
, factor_reg
, src_alpha
, src_alpha
);
599 case PIPE_BLENDFACTOR_DST_ALPHA
:
600 factor_reg
= dst_alpha
;
603 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE
:
607 case PIPE_BLENDFACTOR_INV_CONST_ALPHA
:
608 const_alpha
= 1.0 - const_alpha
;
610 case PIPE_BLENDFACTOR_CONST_ALPHA
:
611 factor_reg
= spe_allocate_available_register(f
);
613 spe_il(f
, factor_reg
, alpha
.u
& 0x0ffff);
614 spe_ilh(f
, factor_reg
, alpha
.u
>> 16);
617 case PIPE_BLENDFACTOR_ZERO
:
621 case PIPE_BLENDFACTOR_INV_SRC_ALPHA
:
622 tmp
= spe_allocate_available_register(f
);
623 factor_reg
= spe_allocate_available_register(f
);
626 spe_cuflt(f
, tmp
, tmp
, 0);
627 spe_fs(f
, factor_reg
, tmp
, src_alpha
);
629 spe_release_register(f
, tmp
);
632 case PIPE_BLENDFACTOR_INV_DST_ALPHA
:
633 tmp
= spe_allocate_available_register(f
);
634 factor_reg
= spe_allocate_available_register(f
);
637 spe_cuflt(f
, tmp
, tmp
, 0);
638 spe_fs(f
, factor_reg
, tmp
, dst_alpha
);
640 spe_release_register(f
, tmp
);
643 case PIPE_BLENDFACTOR_SRC1_ALPHA
:
644 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA
:
656 * \note Emits a maximum of 5 instructions
659 emit_color_factor_calculation(struct spe_function
*f
,
660 unsigned sF
, unsigned mask
,
661 const struct pipe_blend_color
*blend_color
,
674 color
.f
[0] = blend_color
->color
[0];
675 color
.f
[1] = blend_color
->color
[1];
676 color
.f
[2] = blend_color
->color
[2];
677 color
.f
[3] = blend_color
->color
[3];
685 case PIPE_BLENDFACTOR_ONE
:
688 case PIPE_BLENDFACTOR_SRC_COLOR
:
689 for (i
= 0; i
< 3; ++i
) {
690 if ((mask
& (1U << i
)) != 0) {
691 factor
[i
] = spe_allocate_available_register(f
);
692 spe_or(f
, factor
[i
], src
[i
], src
[i
]);
697 case PIPE_BLENDFACTOR_SRC_ALPHA
:
698 factor
[0] = spe_allocate_available_register(f
);
699 factor
[1] = factor
[0];
700 factor
[2] = factor
[0];
702 spe_or(f
, factor
[0], src
[3], src
[3]);
705 case PIPE_BLENDFACTOR_DST_ALPHA
:
711 case PIPE_BLENDFACTOR_DST_COLOR
:
717 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE
:
718 tmp
= spe_allocate_available_register(f
);
719 factor
[0] = spe_allocate_available_register(f
);
720 factor
[1] = factor
[0];
721 factor
[2] = factor
[0];
723 /* Alpha saturate means min(As, 1-Ad).
726 spe_cuflt(f
, tmp
, tmp
, 0);
727 spe_fs(f
, tmp
, tmp
, dst
[3]);
728 spe_fcgt(f
, factor
[0], tmp
, src
[3]);
729 spe_selb(f
, factor
[0], src
[3], tmp
, factor
[0]);
731 spe_release_register(f
, tmp
);
734 case PIPE_BLENDFACTOR_INV_CONST_COLOR
:
735 color
.f
[0] = 1.0 - color
.f
[0];
736 color
.f
[1] = 1.0 - color
.f
[1];
737 color
.f
[2] = 1.0 - color
.f
[2];
739 case PIPE_BLENDFACTOR_CONST_COLOR
:
740 for (i
= 0; i
< 3; i
++) {
741 factor
[i
] = spe_allocate_available_register(f
);
743 spe_il(f
, factor
[i
], color
.u
[i
] & 0x0ffff);
744 spe_ilh(f
, factor
[i
], color
.u
[i
] >> 16);
748 case PIPE_BLENDFACTOR_INV_CONST_ALPHA
:
749 color
.f
[3] = 1.0 - color
.f
[3];
751 case PIPE_BLENDFACTOR_CONST_ALPHA
:
752 factor
[0] = spe_allocate_available_register(f
);
753 factor
[1] = factor
[0];
754 factor
[2] = factor
[0];
756 spe_il(f
, factor
[0], color
.u
[3] & 0x0ffff);
757 spe_ilh(f
, factor
[0], color
.u
[3] >> 16);
760 case PIPE_BLENDFACTOR_ZERO
:
763 case PIPE_BLENDFACTOR_INV_SRC_COLOR
:
764 tmp
= spe_allocate_available_register(f
);
767 spe_cuflt(f
, tmp
, tmp
, 0);
769 for (i
= 0; i
< 3; ++i
) {
770 if ((mask
& (1U << i
)) != 0) {
771 factor
[i
] = spe_allocate_available_register(f
);
772 spe_fs(f
, factor
[i
], tmp
, src
[i
]);
776 spe_release_register(f
, tmp
);
779 case PIPE_BLENDFACTOR_INV_SRC_ALPHA
:
780 tmp
= spe_allocate_available_register(f
);
781 factor
[0] = spe_allocate_available_register(f
);
782 factor
[1] = factor
[0];
783 factor
[2] = factor
[0];
786 spe_cuflt(f
, tmp
, tmp
, 0);
787 spe_fs(f
, factor
[0], tmp
, src
[3]);
789 spe_release_register(f
, tmp
);
792 case PIPE_BLENDFACTOR_INV_DST_ALPHA
:
793 tmp
= spe_allocate_available_register(f
);
794 factor
[0] = spe_allocate_available_register(f
);
795 factor
[1] = factor
[0];
796 factor
[2] = factor
[0];
799 spe_cuflt(f
, tmp
, tmp
, 0);
800 spe_fs(f
, factor
[0], tmp
, dst
[3]);
802 spe_release_register(f
, tmp
);
805 case PIPE_BLENDFACTOR_INV_DST_COLOR
:
806 tmp
= spe_allocate_available_register(f
);
809 spe_cuflt(f
, tmp
, tmp
, 0);
811 for (i
= 0; i
< 3; ++i
) {
812 if ((mask
& (1U << i
)) != 0) {
813 factor
[i
] = spe_allocate_available_register(f
);
814 spe_fs(f
, factor
[i
], tmp
, dst
[i
]);
818 spe_release_register(f
, tmp
);
821 case PIPE_BLENDFACTOR_SRC1_COLOR
:
822 case PIPE_BLENDFACTOR_SRC1_ALPHA
:
823 case PIPE_BLENDFACTOR_INV_SRC1_COLOR
:
824 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA
:
832 emit_blend_calculation(struct spe_function
*f
,
833 unsigned func
, unsigned sF
, unsigned dF
,
834 int src
, int src_factor
, int dst
, int dst_factor
)
836 int tmp
= spe_allocate_available_register(f
);
840 if (sF
== PIPE_BLENDFACTOR_ONE
) {
841 if (dF
== PIPE_BLENDFACTOR_ZERO
) {
843 } else if (dF
== PIPE_BLENDFACTOR_ONE
) {
844 spe_fa(f
, src
, src
, dst
);
846 } else if (sF
== PIPE_BLENDFACTOR_ZERO
) {
847 if (dF
== PIPE_BLENDFACTOR_ZERO
) {
849 } else if (dF
== PIPE_BLENDFACTOR_ONE
) {
850 spe_or(f
, src
, dst
, dst
);
853 spe_fm(f
, tmp
, dst
, dst_factor
);
854 spe_fma(f
, src
, src
, src_factor
, tmp
);
858 case PIPE_BLEND_SUBTRACT
:
859 if (sF
== PIPE_BLENDFACTOR_ONE
) {
860 if (dF
== PIPE_BLENDFACTOR_ZERO
) {
862 } else if (dF
== PIPE_BLENDFACTOR_ONE
) {
863 spe_fs(f
, src
, src
, dst
);
865 } else if (sF
== PIPE_BLENDFACTOR_ZERO
) {
866 if (dF
== PIPE_BLENDFACTOR_ZERO
) {
868 } else if (dF
== PIPE_BLENDFACTOR_ONE
) {
870 spe_fs(f
, src
, tmp
, dst
);
873 spe_fm(f
, tmp
, dst
, dst_factor
);
874 spe_fms(f
, src
, src
, src_factor
, tmp
);
878 case PIPE_BLEND_REVERSE_SUBTRACT
:
879 if (sF
== PIPE_BLENDFACTOR_ONE
) {
880 if (dF
== PIPE_BLENDFACTOR_ZERO
) {
882 spe_fs(f
, src
, tmp
, src
);
883 } else if (dF
== PIPE_BLENDFACTOR_ONE
) {
884 spe_fs(f
, src
, dst
, src
);
886 } else if (sF
== PIPE_BLENDFACTOR_ZERO
) {
887 if (dF
== PIPE_BLENDFACTOR_ZERO
) {
889 } else if (dF
== PIPE_BLENDFACTOR_ONE
) {
890 spe_or(f
, src
, dst
, dst
);
893 spe_fm(f
, tmp
, src
, src_factor
);
894 spe_fms(f
, src
, src
, dst_factor
, tmp
);
899 spe_cgt(f
, tmp
, src
, dst
);
900 spe_selb(f
, src
, dst
, src
, tmp
);
904 spe_cgt(f
, tmp
, src
, dst
);
905 spe_selb(f
, src
, src
, dst
, tmp
);
912 spe_release_register(f
, tmp
);
917 * Generate code to perform alpha blending on the SPE
920 cell_generate_alpha_blend(struct cell_blend_state
*cb
,
921 const struct pipe_blend_color
*blend_color
)
923 struct pipe_blend_state
*const b
= &cb
->base
;
924 struct spe_function
*const f
= &cb
->code
;
926 /* This code generates a maximum of 3 (source alpha factor)
927 * + 3 (destination alpha factor) + (3 * 5) (source color factor)
928 * + (3 * 5) (destination color factor) + (4 * 2) (blend equation)
929 * + 4 (fragment mask) + 1 (return) = 49 instlructions. Round up to 64 to
930 * make it a happy power-of-two.
932 spe_init_func(f
, 4 * 64);
935 const int frag
[4] = {
936 spe_allocate_register(f
, 3),
937 spe_allocate_register(f
, 4),
938 spe_allocate_register(f
, 5),
939 spe_allocate_register(f
, 6),
941 const int pixel
[4] = {
942 spe_allocate_register(f
, 7),
943 spe_allocate_register(f
, 8),
944 spe_allocate_register(f
, 9),
945 spe_allocate_register(f
, 10),
947 const int mask
= spe_allocate_register(f
, 11);
956 /* Does the selected blend mode make use of the source / destination
957 * color (RGB) blend factors?
959 boolean need_color_factor
= b
->blend_enable
960 && (b
->rgb_func
!= PIPE_BLEND_MIN
)
961 && (b
->rgb_func
!= PIPE_BLEND_MAX
);
963 /* Does the selected blend mode make use of the source / destination
964 * alpha blend factors?
966 boolean need_alpha_factor
= b
->blend_enable
967 && (b
->alpha_func
!= PIPE_BLEND_MIN
)
968 && (b
->alpha_func
!= PIPE_BLEND_MAX
);
971 sF
[0] = b
->rgb_src_factor
;
974 sF
[3] = (b
->alpha_src_factor
== PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE
)
975 ? PIPE_BLENDFACTOR_ONE
: b
->alpha_src_factor
;
977 dF
[0] = b
->rgb_dst_factor
;
980 dF
[3] = b
->rgb_dst_factor
;
983 /* If alpha writing is enabled and the alpha blend mode requires use of
984 * the alpha factor, calculate the alpha factor.
986 if (((b
->colormask
& 8) != 0) && need_alpha_factor
) {
987 src_factor
[3] = emit_alpha_factor_calculation(f
, sF
[3],
988 blend_color
->color
[3],
991 /* If the alpha destination blend factor is the same as the alpha source
992 * blend factor, re-use the previously calculated value.
994 dst_factor
[3] = (dF
[3] == sF
[3])
996 : emit_alpha_factor_calculation(f
, dF
[3],
997 blend_color
->color
[3],
1002 if (sF
[0] == sF
[3]) {
1003 src_factor
[0] = src_factor
[3];
1004 src_factor
[1] = src_factor
[3];
1005 src_factor
[2] = src_factor
[3];
1006 } else if (sF
[0] == dF
[3]) {
1007 src_factor
[0] = dst_factor
[3];
1008 src_factor
[1] = dst_factor
[3];
1009 src_factor
[2] = dst_factor
[3];
1010 } else if (need_color_factor
) {
1011 emit_color_factor_calculation(f
,
1015 frag
, pixel
, src_factor
);
1019 if (dF
[0] == sF
[3]) {
1020 dst_factor
[0] = src_factor
[3];
1021 dst_factor
[1] = src_factor
[3];
1022 dst_factor
[2] = src_factor
[3];
1023 } else if (dF
[0] == dF
[3]) {
1024 dst_factor
[0] = dst_factor
[3];
1025 dst_factor
[1] = dst_factor
[3];
1026 dst_factor
[2] = dst_factor
[3];
1027 } else if (dF
[0] == sF
[0]) {
1028 dst_factor
[0] = src_factor
[0];
1029 dst_factor
[1] = src_factor
[1];
1030 dst_factor
[2] = src_factor
[2];
1031 } else if (need_color_factor
) {
1032 emit_color_factor_calculation(f
,
1036 frag
, pixel
, dst_factor
);
1041 func
[0] = b
->rgb_func
;
1044 func
[3] = b
->alpha_func
;
1046 for (i
= 0; i
< 4; ++i
) {
1047 if ((b
->colormask
& (1U << i
)) != 0) {
1048 emit_blend_calculation(f
,
1049 func
[i
], sF
[i
], dF
[i
],
1050 frag
[i
], src_factor
[i
],
1051 pixel
[i
], dst_factor
[i
]);
1052 spe_selb(f
, frag
[i
], pixel
[i
], frag
[i
], mask
);
1054 spe_or(f
, frag
[i
], pixel
[i
], pixel
[i
]);