2 * (C) Copyright IBM Corporation 2008
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
27 * Generate code to perform all per-fragment operations.
29 * Code generated by these functions perform both alpha, depth, and stencil
30 * testing as well as alpha blending.
33 * Occlusion query is not supported, but this is the right place to add that
36 * \author Ian Romanick <idr@us.ibm.com>
39 #include "pipe/p_defines.h"
40 #include "pipe/p_state.h"
42 #include "cell_context.h"
44 #include "rtasm/rtasm_ppc_spe.h"
48 * Generate code to perform alpha testing.
50 * The code generated by this function uses the register specificed by
51 * \c mask as both an input and an output.
53 * \param dsa Current alpha-test state
54 * \param f Function to which code should be appended
55 * \param mask Index of register containing active fragment mask
56 * \param alphas Index of register containing per-fragment alpha values
58 * \note Emits a maximum of 6 instructions.
61 emit_alpha_test(struct pipe_depth_stencil_alpha_state
*dsa
,
62 struct spe_function
*f
, int mask
, int alphas
)
64 /* If the alpha function is either NEVER or ALWAYS, there is no need to
65 * load the reference value into a register. ALWAYS is a fairly common
66 * case, and this optimization saves 2 instructions.
68 if (dsa
->alpha
.enabled
69 && (dsa
->alpha
.func
!= PIPE_FUNC_NEVER
)
70 && (dsa
->alpha
.func
!= PIPE_FUNC_ALWAYS
)) {
71 int ref
= spe_allocate_available_register(f
);
72 int tmp_a
= spe_allocate_available_register(f
);
73 int tmp_b
= spe_allocate_available_register(f
);
78 boolean complement
= FALSE
;
80 ref_val
.f
= dsa
->alpha
.ref
;
82 spe_il(f
, ref
, ref_val
.u
& 0x0000ffff);
83 spe_ilh(f
, ref
, ref_val
.u
>> 16);
85 switch (dsa
->alpha
.func
) {
86 case PIPE_FUNC_NOTEQUAL
:
91 spe_fceq(f
, tmp_a
, ref
, alphas
);
94 case PIPE_FUNC_LEQUAL
:
98 case PIPE_FUNC_GREATER
:
99 spe_fcgt(f
, tmp_a
, ref
, alphas
);
106 case PIPE_FUNC_GEQUAL
:
107 spe_fcgt(f
, tmp_a
, ref
, alphas
);
108 spe_fceq(f
, tmp_b
, ref
, alphas
);
109 spe_or(f
, tmp_a
, tmp_b
, tmp_a
);
112 case PIPE_FUNC_ALWAYS
:
113 case PIPE_FUNC_NEVER
:
120 spe_andc(f
, mask
, mask
, tmp_a
);
122 spe_and(f
, mask
, mask
, tmp_a
);
125 spe_release_register(f
, ref
);
126 spe_release_register(f
, tmp_a
);
127 spe_release_register(f
, tmp_b
);
128 } else if (dsa
->alpha
.enabled
&& (dsa
->alpha
.func
== PIPE_FUNC_NEVER
)) {
135 * \param dsa Current depth-test state
136 * \param f Function to which code should be appended
137 * \param m Mask of allocated / free SPE registers
138 * \param mask Index of register to contain depth-pass mask
139 * \param stored Index of register containing values from depth buffer
140 * \param calculated Index of register containing per-fragment depth values
143 * If the calculated depth comparison mask is the actual mask, \c FALSE is
144 * returned. If the calculated depth comparison mask is the compliment of
145 * the actual mask, \c TRUE is returned.
147 * \note Emits a maximum of 3 instructions.
150 emit_depth_test(struct pipe_depth_stencil_alpha_state
*dsa
,
151 struct spe_function
*f
, int mask
, int stored
, int calculated
)
153 unsigned func
= (dsa
->depth
.enabled
)
154 ? dsa
->depth
.func
: PIPE_FUNC_ALWAYS
;
155 int tmp
= spe_allocate_available_register(f
);
156 boolean compliment
= FALSE
;
159 case PIPE_FUNC_NEVER
:
163 case PIPE_FUNC_NOTEQUAL
:
166 case PIPE_FUNC_EQUAL
:
167 spe_ceq(f
, mask
, calculated
, stored
);
170 case PIPE_FUNC_LEQUAL
:
173 case PIPE_FUNC_GREATER
:
174 spe_clgt(f
, mask
, calculated
, stored
);
180 case PIPE_FUNC_GEQUAL
:
181 spe_clgt(f
, mask
, calculated
, stored
);
182 spe_ceq(f
, tmp
, calculated
, stored
);
183 spe_or(f
, mask
, mask
, tmp
);
186 case PIPE_FUNC_ALWAYS
:
195 spe_release_register(f
, tmp
);
201 * \note Emits a maximum of 5 instructions.
204 * Since \c out and \c in might be the same register, this routine cannot
205 * generate code that uses \c out as a temporary.
208 emit_stencil_op(struct spe_function
*f
,
209 int out
, int in
, int mask
, unsigned op
, unsigned ref
)
211 const int clamp
= spe_allocate_available_register(f
);
212 const int clamp_mask
= spe_allocate_available_register(f
);
213 const int result
= spe_allocate_available_register(f
);
216 case PIPE_STENCIL_OP_KEEP
:
218 case PIPE_STENCIL_OP_ZERO
:
219 spe_il(f
, result
, 0);
221 case PIPE_STENCIL_OP_REPLACE
:
222 spe_il(f
, result
, ref
);
224 case PIPE_STENCIL_OP_INCR
:
225 spe_il(f
, clamp
, 0x0ff);
226 spe_ai(f
, result
, in
, 1);
227 spe_clgti(f
, clamp_mask
, result
, 0x0ff);
228 spe_selb(f
, result
, result
, clamp
, clamp_mask
);
230 case PIPE_STENCIL_OP_DECR
:
232 spe_ai(f
, result
, in
, -1);
234 /* If "(s-1) < 0" in signed arithemtic, then "(s-1) > MAX" in unsigned
237 spe_clgti(f
, clamp_mask
, result
, 0x0ff);
238 spe_selb(f
, result
, result
, clamp
, clamp_mask
);
240 case PIPE_STENCIL_OP_INCR_WRAP
:
241 spe_ai(f
, result
, in
, 1);
243 case PIPE_STENCIL_OP_DECR_WRAP
:
244 spe_ai(f
, result
, in
, -1);
246 case PIPE_STENCIL_OP_INVERT
:
247 spe_nor(f
, result
, in
, in
);
253 spe_selb(f
, out
, in
, result
, mask
);
255 spe_release_register(f
, result
);
256 spe_release_register(f
, clamp_mask
);
257 spe_release_register(f
, clamp
);
262 * \param dsa Depth / stencil test state
263 * \param face 0 for front face, 1 for back face
264 * \param f Function to append instructions to
265 * \param reg_mask Mask of allocated registers
266 * \param mask Register containing mask of fragments passing the
268 * \param depth_mask Register containing mask of fragments passing the
270 * \param depth_compliment Is \c depth_mask the compliment of the actual mask?
271 * \param stencil Register containing values from stencil buffer
272 * \param depth_pass Register to store mask of fragments passing stencil test
276 * Emits a maximum of 10 + (3 * 5) = 25 instructions.
279 emit_stencil_test(struct pipe_depth_stencil_alpha_state
*dsa
,
281 struct spe_function
*f
,
284 boolean depth_complement
,
288 int stencil_fail
= spe_allocate_available_register(f
);
289 int depth_fail
= spe_allocate_available_register(f
);
290 int stencil_mask
= spe_allocate_available_register(f
);
291 int stencil_pass
= spe_allocate_available_register(f
);
292 int face_stencil
= spe_allocate_available_register(f
);
293 int stencil_src
= stencil
;
294 const unsigned ref
= (dsa
->stencil
[face
].ref_value
295 & dsa
->stencil
[face
].value_mask
);
296 boolean complement
= FALSE
;
298 int tmp
= spe_allocate_available_register(f
);
301 if ((dsa
->stencil
[face
].func
!= PIPE_FUNC_NEVER
)
302 && (dsa
->stencil
[face
].func
!= PIPE_FUNC_ALWAYS
)
303 && (dsa
->stencil
[face
].value_mask
!= 0x0ff)) {
304 stored
= spe_allocate_available_register(f
);
305 spe_andi(f
, stored
, stencil
, dsa
->stencil
[face
].value_mask
);
311 switch (dsa
->stencil
[face
].func
) {
312 case PIPE_FUNC_NEVER
:
313 spe_il(f
, stencil_mask
, 0);
316 case PIPE_FUNC_NOTEQUAL
:
319 case PIPE_FUNC_EQUAL
:
320 spe_ceqi(f
, stencil_mask
, stored
, ref
);
323 case PIPE_FUNC_LEQUAL
:
326 case PIPE_FUNC_GREATER
:
327 spe_clgti(f
, stencil_mask
, stored
, ref
);
333 case PIPE_FUNC_GEQUAL
:
334 spe_clgti(f
, stencil_mask
, stored
, ref
);
335 spe_ceqi(f
, tmp
, stored
, ref
);
336 spe_or(f
, stencil_mask
, stencil_mask
, tmp
);
339 case PIPE_FUNC_ALWAYS
:
340 /* See comment below. */
348 if (stored
!= stencil
) {
349 spe_release_register(f
, stored
);
351 spe_release_register(f
, tmp
);
354 /* ALWAYS is a very common stencil-test, so some effort is applied to
355 * optimize that case. The stencil-pass mask is the same as the input
356 * fragment mask. This makes the stencil-test (above) a no-op, and the
357 * input fragment mask can be "renamed" the stencil-pass mask.
359 if (dsa
->stencil
[face
].func
== PIPE_FUNC_ALWAYS
) {
360 spe_release_register(f
, stencil_pass
);
364 spe_andc(f
, stencil_pass
, mask
, stencil_mask
);
366 spe_and(f
, stencil_pass
, mask
, stencil_mask
);
370 if (depth_complement
) {
371 spe_andc(f
, depth_pass
, stencil_pass
, depth_mask
);
373 spe_and(f
, depth_pass
, stencil_pass
, depth_mask
);
377 /* Conditionally emit code to update the stencil value under various
378 * condititons. Note that there is no need to generate code under the
379 * following circumstances:
381 * - Stencil write mask is zero.
382 * - For stencil-fail if the stencil test is ALWAYS
383 * - For depth-fail if the stencil test is NEVER
384 * - For depth-pass if the stencil test is NEVER
385 * - Any of the 3 conditions if the operation is KEEP
387 if (dsa
->stencil
[face
].write_mask
!= 0) {
388 if ((dsa
->stencil
[face
].func
!= PIPE_FUNC_ALWAYS
)
389 && (dsa
->stencil
[face
].fail_op
!= PIPE_STENCIL_OP_KEEP
)) {
391 spe_and(f
, stencil_fail
, mask
, stencil_mask
);
393 spe_andc(f
, stencil_fail
, mask
, stencil_mask
);
396 emit_stencil_op(f
, face_stencil
, stencil_src
, stencil_fail
,
397 dsa
->stencil
[face
].fail_op
,
398 dsa
->stencil
[face
].ref_value
);
400 stencil_src
= face_stencil
;
403 if ((dsa
->stencil
[face
].func
!= PIPE_FUNC_NEVER
)
404 && (dsa
->stencil
[face
].zfail_op
!= PIPE_STENCIL_OP_KEEP
)) {
405 if (depth_complement
) {
406 spe_and(f
, depth_fail
, stencil_pass
, depth_mask
);
408 spe_andc(f
, depth_fail
, stencil_pass
, depth_mask
);
411 emit_stencil_op(f
, face_stencil
, stencil_src
, depth_fail
,
412 dsa
->stencil
[face
].zfail_op
,
413 dsa
->stencil
[face
].ref_value
);
414 stencil_src
= face_stencil
;
417 if ((dsa
->stencil
[face
].func
!= PIPE_FUNC_NEVER
)
418 && (dsa
->stencil
[face
].zpass_op
!= PIPE_STENCIL_OP_KEEP
)) {
419 emit_stencil_op(f
, face_stencil
, stencil_src
, depth_pass
,
420 dsa
->stencil
[face
].zpass_op
,
421 dsa
->stencil
[face
].ref_value
);
422 stencil_src
= face_stencil
;
426 spe_release_register(f
, stencil_fail
);
427 spe_release_register(f
, depth_fail
);
428 spe_release_register(f
, stencil_mask
);
429 if (stencil_pass
!= mask
) {
430 spe_release_register(f
, stencil_pass
);
433 /* If all of the stencil operations were KEEP or the stencil write mask was
434 * zero, "stencil_src" will still be set to "stencil". In this case
435 * release the "face_stencil" register. Otherwise apply the stencil write
436 * mask to select bits from the calculated stencil value and the previous
439 if (stencil_src
== stencil
) {
440 spe_release_register(f
, face_stencil
);
441 } else if (dsa
->stencil
[face
].write_mask
!= 0x0ff) {
442 int tmp
= spe_allocate_available_register(f
);
444 spe_il(f
, tmp
, dsa
->stencil
[face
].write_mask
);
445 spe_selb(f
, stencil_src
, stencil
, stencil_src
, tmp
);
447 spe_release_register(f
, tmp
);
455 cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state
*cdsa
)
457 struct pipe_depth_stencil_alpha_state
*const dsa
= &cdsa
->base
;
458 struct spe_function
*const f
= &cdsa
->code
;
460 /* This code generates a maximum of 6 (alpha test) + 3 (depth test)
461 * + 25 (front stencil) + 25 (back stencil) + 4 = 63 instructions. Round
462 * up to 64 to make it a happy power-of-two.
464 spe_init_func(f
, 4 * 64);
467 /* Allocate registers for the function's input parameters. Cleverly (and
468 * clever code is usually dangerous, but I couldn't resist) the generated
469 * function returns a structure. Returned structures start with register
470 * 3, and the structure fields are ordered to match up exactly with the
473 int mask
= spe_allocate_register(f
, 3);
474 int depth
= spe_allocate_register(f
, 4);
475 int stencil
= spe_allocate_register(f
, 5);
476 int zvals
= spe_allocate_register(f
, 6);
477 int frag_a
= spe_allocate_register(f
, 7);
478 int facing
= spe_allocate_register(f
, 8);
480 int depth_mask
= spe_allocate_available_register(f
);
482 boolean depth_complement
;
485 emit_alpha_test(dsa
, f
, mask
, frag_a
);
487 depth_complement
= emit_depth_test(dsa
, f
, depth_mask
, depth
, zvals
);
489 if (dsa
->stencil
[0].enabled
) {
490 const int front_depth_pass
= spe_allocate_available_register(f
);
491 int front_stencil
= emit_stencil_test(dsa
, 0, f
, mask
,
492 depth_mask
, depth_complement
,
493 stencil
, front_depth_pass
);
495 if (dsa
->stencil
[1].enabled
) {
496 const int back_depth_pass
= spe_allocate_available_register(f
);
497 int back_stencil
= emit_stencil_test(dsa
, 1, f
, mask
,
498 depth_mask
, depth_complement
,
499 stencil
, back_depth_pass
);
501 /* If the front facing stencil value and the back facing stencil
502 * value are stored in the same register, there is no need to select
503 * a value based on the facing. This can happen if the stencil value
504 * was not modified due to the write masks being zero, the stencil
505 * operations being KEEP, etc.
507 if (front_stencil
!= back_stencil
) {
508 spe_selb(f
, stencil
, back_stencil
, front_stencil
, facing
);
511 if (back_stencil
!= stencil
) {
512 spe_release_register(f
, back_stencil
);
515 if (front_stencil
!= stencil
) {
516 spe_release_register(f
, front_stencil
);
519 spe_selb(f
, mask
, back_depth_pass
, front_depth_pass
, facing
);
521 spe_release_register(f
, back_depth_pass
);
523 if (front_stencil
!= stencil
) {
524 spe_or(f
, stencil
, front_stencil
, front_stencil
);
525 spe_release_register(f
, front_stencil
);
527 spe_or(f
, mask
, front_depth_pass
, front_depth_pass
);
530 spe_release_register(f
, front_depth_pass
);
531 } else if (dsa
->depth
.enabled
) {
532 if (depth_complement
) {
533 spe_andc(f
, mask
, mask
, depth_mask
);
535 spe_and(f
, mask
, mask
, depth_mask
);
539 if (dsa
->depth
.writemask
) {
540 spe_selb(f
, depth
, depth
, zvals
, mask
);
548 const uint32_t *p
= f
->store
;
551 printf("# alpha (%sabled)\n",
552 (dsa
->alpha
.enabled
) ? "en" : "dis");
553 printf("# func: %u\n", dsa
->alpha
.func
);
554 printf("# ref: %.2f\n", dsa
->alpha
.ref
);
556 printf("# depth (%sabled)\n",
557 (dsa
->depth
.enabled
) ? "en" : "dis");
558 printf("# func: %u\n", dsa
->depth
.func
);
560 for (i
= 0; i
< 2; i
++) {
561 printf("# %s stencil (%sabled)\n",
562 (i
== 0) ? "front" : "back",
563 (dsa
->stencil
[i
].enabled
) ? "en" : "dis");
565 printf("# func: %u\n", dsa
->stencil
[i
].func
);
566 printf("# op (sf, zf, zp): %u %u %u\n",
567 dsa
->stencil
[i
].fail_op
,
568 dsa
->stencil
[i
].zfail_op
,
569 dsa
->stencil
[i
].zpass_op
);
570 printf("# ref value / value mask / write mask: %02x %02x %02x\n",
571 dsa
->stencil
[i
].ref_value
,
572 dsa
->stencil
[i
].value_mask
,
573 dsa
->stencil
[i
].write_mask
);
577 for (/* empty */; p
< f
->csr
; p
++) {
578 printf("\t.long\t0x%04x\n", *p
);
587 * \note Emits a maximum of 3 instructions
590 emit_alpha_factor_calculation(struct spe_function
*f
,
592 int src_alpha
, int dst_alpha
, int const_alpha
)
599 case PIPE_BLENDFACTOR_ONE
:
603 case PIPE_BLENDFACTOR_SRC_ALPHA
:
604 factor_reg
= spe_allocate_available_register(f
);
606 spe_or(f
, factor_reg
, src_alpha
, src_alpha
);
609 case PIPE_BLENDFACTOR_DST_ALPHA
:
610 factor_reg
= dst_alpha
;
613 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE
:
617 case PIPE_BLENDFACTOR_INV_CONST_ALPHA
:
618 factor_reg
= spe_allocate_available_register(f
);
620 tmp
= spe_allocate_available_register(f
);
622 spe_cuflt(f
, tmp
, tmp
, 0);
623 spe_fs(f
, factor_reg
, tmp
, const_alpha
);
624 spe_release_register(f
, tmp
);
627 case PIPE_BLENDFACTOR_CONST_ALPHA
:
628 factor_reg
= const_alpha
;
631 case PIPE_BLENDFACTOR_ZERO
:
635 case PIPE_BLENDFACTOR_INV_SRC_ALPHA
:
636 tmp
= spe_allocate_available_register(f
);
637 factor_reg
= spe_allocate_available_register(f
);
640 spe_cuflt(f
, tmp
, tmp
, 0);
641 spe_fs(f
, factor_reg
, tmp
, src_alpha
);
643 spe_release_register(f
, tmp
);
646 case PIPE_BLENDFACTOR_INV_DST_ALPHA
:
647 tmp
= spe_allocate_available_register(f
);
648 factor_reg
= spe_allocate_available_register(f
);
651 spe_cuflt(f
, tmp
, tmp
, 0);
652 spe_fs(f
, factor_reg
, tmp
, dst_alpha
);
654 spe_release_register(f
, tmp
);
657 case PIPE_BLENDFACTOR_SRC1_ALPHA
:
658 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA
:
670 * \note Emits a maximum of 6 instructions
673 emit_color_factor_calculation(struct spe_function
*f
,
674 unsigned sF
, unsigned mask
,
677 const int *const_color
,
690 case PIPE_BLENDFACTOR_ONE
:
693 case PIPE_BLENDFACTOR_SRC_COLOR
:
694 for (i
= 0; i
< 3; ++i
) {
695 if ((mask
& (1U << i
)) != 0) {
696 factor
[i
] = spe_allocate_available_register(f
);
697 spe_or(f
, factor
[i
], src
[i
], src
[i
]);
702 case PIPE_BLENDFACTOR_SRC_ALPHA
:
703 factor
[0] = spe_allocate_available_register(f
);
704 factor
[1] = factor
[0];
705 factor
[2] = factor
[0];
707 spe_or(f
, factor
[0], src
[3], src
[3]);
710 case PIPE_BLENDFACTOR_DST_ALPHA
:
716 case PIPE_BLENDFACTOR_DST_COLOR
:
722 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE
:
723 tmp
= spe_allocate_available_register(f
);
724 factor
[0] = spe_allocate_available_register(f
);
725 factor
[1] = factor
[0];
726 factor
[2] = factor
[0];
728 /* Alpha saturate means min(As, 1-Ad).
731 spe_cuflt(f
, tmp
, tmp
, 0);
732 spe_fs(f
, tmp
, tmp
, dst
[3]);
733 spe_fcgt(f
, factor
[0], tmp
, src
[3]);
734 spe_selb(f
, factor
[0], src
[3], tmp
, factor
[0]);
736 spe_release_register(f
, tmp
);
739 case PIPE_BLENDFACTOR_INV_CONST_COLOR
:
740 tmp
= spe_allocate_available_register(f
);
742 spe_cuflt(f
, tmp
, tmp
, 0);
744 for (i
= 0; i
< 3; i
++) {
745 factor
[i
] = spe_allocate_available_register(f
);
747 spe_fs(f
, factor
[i
], tmp
, const_color
[i
]);
749 spe_release_register(f
, tmp
);
752 case PIPE_BLENDFACTOR_CONST_COLOR
:
753 for (i
= 0; i
< 3; i
++) {
754 factor
[i
] = const_color
[i
];
758 case PIPE_BLENDFACTOR_INV_CONST_ALPHA
:
759 factor
[0] = spe_allocate_available_register(f
);
760 factor
[1] = factor
[0];
761 factor
[2] = factor
[0];
763 tmp
= spe_allocate_available_register(f
);
765 spe_cuflt(f
, tmp
, tmp
, 0);
766 spe_fs(f
, factor
[0], tmp
, const_color
[3]);
767 spe_release_register(f
, tmp
);
770 case PIPE_BLENDFACTOR_CONST_ALPHA
:
771 factor
[0] = const_color
[3];
772 factor
[1] = factor
[0];
773 factor
[2] = factor
[0];
776 case PIPE_BLENDFACTOR_ZERO
:
779 case PIPE_BLENDFACTOR_INV_SRC_COLOR
:
780 tmp
= spe_allocate_available_register(f
);
783 spe_cuflt(f
, tmp
, tmp
, 0);
785 for (i
= 0; i
< 3; ++i
) {
786 if ((mask
& (1U << i
)) != 0) {
787 factor
[i
] = spe_allocate_available_register(f
);
788 spe_fs(f
, factor
[i
], tmp
, src
[i
]);
792 spe_release_register(f
, tmp
);
795 case PIPE_BLENDFACTOR_INV_SRC_ALPHA
:
796 tmp
= spe_allocate_available_register(f
);
797 factor
[0] = spe_allocate_available_register(f
);
798 factor
[1] = factor
[0];
799 factor
[2] = factor
[0];
802 spe_cuflt(f
, tmp
, tmp
, 0);
803 spe_fs(f
, factor
[0], tmp
, src
[3]);
805 spe_release_register(f
, tmp
);
808 case PIPE_BLENDFACTOR_INV_DST_ALPHA
:
809 tmp
= spe_allocate_available_register(f
);
810 factor
[0] = spe_allocate_available_register(f
);
811 factor
[1] = factor
[0];
812 factor
[2] = factor
[0];
815 spe_cuflt(f
, tmp
, tmp
, 0);
816 spe_fs(f
, factor
[0], tmp
, dst
[3]);
818 spe_release_register(f
, tmp
);
821 case PIPE_BLENDFACTOR_INV_DST_COLOR
:
822 tmp
= spe_allocate_available_register(f
);
825 spe_cuflt(f
, tmp
, tmp
, 0);
827 for (i
= 0; i
< 3; ++i
) {
828 if ((mask
& (1U << i
)) != 0) {
829 factor
[i
] = spe_allocate_available_register(f
);
830 spe_fs(f
, factor
[i
], tmp
, dst
[i
]);
834 spe_release_register(f
, tmp
);
837 case PIPE_BLENDFACTOR_SRC1_COLOR
:
838 case PIPE_BLENDFACTOR_SRC1_ALPHA
:
839 case PIPE_BLENDFACTOR_INV_SRC1_COLOR
:
840 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA
:
848 emit_blend_calculation(struct spe_function
*f
,
849 unsigned func
, unsigned sF
, unsigned dF
,
850 int src
, int src_factor
, int dst
, int dst_factor
)
852 int tmp
= spe_allocate_available_register(f
);
856 if (sF
== PIPE_BLENDFACTOR_ONE
) {
857 if (dF
== PIPE_BLENDFACTOR_ZERO
) {
859 } else if (dF
== PIPE_BLENDFACTOR_ONE
) {
860 spe_fa(f
, src
, src
, dst
);
862 } else if (sF
== PIPE_BLENDFACTOR_ZERO
) {
863 if (dF
== PIPE_BLENDFACTOR_ZERO
) {
865 } else if (dF
== PIPE_BLENDFACTOR_ONE
) {
866 spe_or(f
, src
, dst
, dst
);
868 spe_fm(f
, src
, dst
, dst_factor
);
870 } else if (dF
== PIPE_BLENDFACTOR_ZERO
) {
871 spe_fm(f
, src
, src
, src_factor
);
873 spe_fm(f
, tmp
, dst
, dst_factor
);
874 spe_fma(f
, src
, src
, src_factor
, tmp
);
878 case PIPE_BLEND_SUBTRACT
:
879 if (sF
== PIPE_BLENDFACTOR_ONE
) {
880 if (dF
== PIPE_BLENDFACTOR_ZERO
) {
882 } else if (dF
== PIPE_BLENDFACTOR_ONE
) {
883 spe_fs(f
, src
, src
, dst
);
885 } else if (sF
== PIPE_BLENDFACTOR_ZERO
) {
886 if (dF
== PIPE_BLENDFACTOR_ZERO
) {
888 } else if (dF
== PIPE_BLENDFACTOR_ONE
) {
890 spe_fs(f
, src
, tmp
, dst
);
892 spe_fm(f
, src
, dst
, dst_factor
);
894 } else if (dF
== PIPE_BLENDFACTOR_ZERO
) {
895 spe_fm(f
, src
, src
, src_factor
);
897 spe_fm(f
, tmp
, dst
, dst_factor
);
898 spe_fms(f
, src
, src
, src_factor
, tmp
);
902 case PIPE_BLEND_REVERSE_SUBTRACT
:
903 if (sF
== PIPE_BLENDFACTOR_ONE
) {
904 if (dF
== PIPE_BLENDFACTOR_ZERO
) {
906 spe_fs(f
, src
, tmp
, src
);
907 } else if (dF
== PIPE_BLENDFACTOR_ONE
) {
908 spe_fs(f
, src
, dst
, src
);
910 } else if (sF
== PIPE_BLENDFACTOR_ZERO
) {
911 if (dF
== PIPE_BLENDFACTOR_ZERO
) {
913 } else if (dF
== PIPE_BLENDFACTOR_ONE
) {
914 spe_or(f
, src
, dst
, dst
);
916 spe_fm(f
, src
, dst
, dst_factor
);
918 } else if (dF
== PIPE_BLENDFACTOR_ZERO
) {
919 spe_fm(f
, src
, src
, src_factor
);
921 spe_fm(f
, tmp
, src
, src_factor
);
922 spe_fms(f
, src
, src
, dst_factor
, tmp
);
927 spe_cgt(f
, tmp
, src
, dst
);
928 spe_selb(f
, src
, src
, dst
, tmp
);
932 spe_cgt(f
, tmp
, src
, dst
);
933 spe_selb(f
, src
, dst
, src
, tmp
);
940 spe_release_register(f
, tmp
);
945 * Generate code to perform alpha blending on the SPE
948 cell_generate_alpha_blend(struct cell_blend_state
*cb
)
950 struct pipe_blend_state
*const b
= &cb
->base
;
951 struct spe_function
*const f
= &cb
->code
;
953 /* This code generates a maximum of 3 (source alpha factor)
954 * + 3 (destination alpha factor) + (3 * 6) (source color factor)
955 * + (3 * 6) (destination color factor) + (4 * 2) (blend equation)
956 * + 4 (fragment mask) + 1 (return) = 55 instlructions. Round up to 64 to
957 * make it a happy power-of-two.
959 spe_init_func(f
, 4 * 64);
962 const int frag
[4] = {
963 spe_allocate_register(f
, 3),
964 spe_allocate_register(f
, 4),
965 spe_allocate_register(f
, 5),
966 spe_allocate_register(f
, 6),
968 const int pixel
[4] = {
969 spe_allocate_register(f
, 7),
970 spe_allocate_register(f
, 8),
971 spe_allocate_register(f
, 9),
972 spe_allocate_register(f
, 10),
974 const int const_color
[4] = {
975 spe_allocate_register(f
, 11),
976 spe_allocate_register(f
, 12),
977 spe_allocate_register(f
, 13),
978 spe_allocate_register(f
, 14),
988 /* Does the selected blend mode make use of the source / destination
989 * color (RGB) blend factors?
991 boolean need_color_factor
= b
->blend_enable
992 && (b
->rgb_func
!= PIPE_BLEND_MIN
)
993 && (b
->rgb_func
!= PIPE_BLEND_MAX
);
995 /* Does the selected blend mode make use of the source / destination
996 * alpha blend factors?
998 boolean need_alpha_factor
= b
->blend_enable
999 && (b
->alpha_func
!= PIPE_BLEND_MIN
)
1000 && (b
->alpha_func
!= PIPE_BLEND_MAX
);
1003 if (b
->blend_enable
) {
1004 sF
[0] = b
->rgb_src_factor
;
1007 switch (b
->alpha_src_factor
& 0x0f) {
1008 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE
:
1009 sF
[3] = PIPE_BLENDFACTOR_ONE
;
1011 case PIPE_BLENDFACTOR_SRC_COLOR
:
1012 case PIPE_BLENDFACTOR_DST_COLOR
:
1013 case PIPE_BLENDFACTOR_CONST_COLOR
:
1014 case PIPE_BLENDFACTOR_SRC1_COLOR
:
1015 sF
[3] = b
->alpha_src_factor
+ 1;
1018 sF
[3] = b
->alpha_src_factor
;
1021 dF
[0] = b
->rgb_dst_factor
;
1024 switch (b
->alpha_dst_factor
& 0x0f) {
1025 case PIPE_BLENDFACTOR_SRC_COLOR
:
1026 case PIPE_BLENDFACTOR_DST_COLOR
:
1027 case PIPE_BLENDFACTOR_CONST_COLOR
:
1028 case PIPE_BLENDFACTOR_SRC1_COLOR
:
1029 dF
[3] = b
->alpha_dst_factor
+ 1;
1032 dF
[3] = b
->alpha_dst_factor
;
1035 func
[0] = b
->rgb_func
;
1038 func
[3] = b
->alpha_func
;
1040 sF
[0] = PIPE_BLENDFACTOR_ONE
;
1041 sF
[1] = PIPE_BLENDFACTOR_ONE
;
1042 sF
[2] = PIPE_BLENDFACTOR_ONE
;
1043 sF
[3] = PIPE_BLENDFACTOR_ONE
;
1044 dF
[0] = PIPE_BLENDFACTOR_ZERO
;
1045 dF
[1] = PIPE_BLENDFACTOR_ZERO
;
1046 dF
[2] = PIPE_BLENDFACTOR_ZERO
;
1047 dF
[3] = PIPE_BLENDFACTOR_ZERO
;
1049 func
[0] = PIPE_BLEND_ADD
;
1050 func
[1] = PIPE_BLEND_ADD
;
1051 func
[2] = PIPE_BLEND_ADD
;
1052 func
[3] = PIPE_BLEND_ADD
;
1056 /* If alpha writing is enabled and the alpha blend mode requires use of
1057 * the alpha factor, calculate the alpha factor.
1059 if (((b
->colormask
& 8) != 0) && need_alpha_factor
) {
1060 src_factor
[3] = emit_alpha_factor_calculation(f
, sF
[3], const_color
[3],
1063 /* If the alpha destination blend factor is the same as the alpha source
1064 * blend factor, re-use the previously calculated value.
1066 dst_factor
[3] = (dF
[3] == sF
[3])
1068 : emit_alpha_factor_calculation(f
, dF
[3], const_color
[3],
1073 if (sF
[0] == sF
[3]) {
1074 src_factor
[0] = src_factor
[3];
1075 src_factor
[1] = src_factor
[3];
1076 src_factor
[2] = src_factor
[3];
1077 } else if (sF
[0] == dF
[3]) {
1078 src_factor
[0] = dst_factor
[3];
1079 src_factor
[1] = dst_factor
[3];
1080 src_factor
[2] = dst_factor
[3];
1081 } else if (need_color_factor
) {
1082 emit_color_factor_calculation(f
,
1085 frag
, pixel
, const_color
, src_factor
);
1089 if (dF
[0] == sF
[3]) {
1090 dst_factor
[0] = src_factor
[3];
1091 dst_factor
[1] = src_factor
[3];
1092 dst_factor
[2] = src_factor
[3];
1093 } else if (dF
[0] == dF
[3]) {
1094 dst_factor
[0] = dst_factor
[3];
1095 dst_factor
[1] = dst_factor
[3];
1096 dst_factor
[2] = dst_factor
[3];
1097 } else if (dF
[0] == sF
[0]) {
1098 dst_factor
[0] = src_factor
[0];
1099 dst_factor
[1] = src_factor
[1];
1100 dst_factor
[2] = src_factor
[2];
1101 } else if (need_color_factor
) {
1102 emit_color_factor_calculation(f
,
1105 frag
, pixel
, const_color
, dst_factor
);
1110 for (i
= 0; i
< 4; ++i
) {
1111 if ((b
->colormask
& (1U << i
)) != 0) {
1112 emit_blend_calculation(f
,
1113 func
[i
], sF
[i
], dF
[i
],
1114 frag
[i
], src_factor
[i
],
1115 pixel
[i
], dst_factor
[i
]);
1123 const uint32_t *p
= f
->store
;
1125 printf("# %u instructions\n", f
->csr
- f
->store
);
1126 printf("# blend (%sabled)\n",
1127 (cb
->base
.blend_enable
) ? "en" : "dis");
1128 printf("# RGB func / sf / df: %u %u %u\n",
1130 cb
->base
.rgb_src_factor
,
1131 cb
->base
.rgb_dst_factor
);
1132 printf("# ALP func / sf / df: %u %u %u\n",
1133 cb
->base
.alpha_func
,
1134 cb
->base
.alpha_src_factor
,
1135 cb
->base
.alpha_dst_factor
);
1137 printf("\t.text\n");
1138 for (/* empty */; p
< f
->csr
; p
++) {
1139 printf("\t.long\t0x%04x\n", *p
);
1147 int PC_OFFSET(const struct spe_function
*f
, const void *d
)
1149 const intptr_t pc
= (intptr_t) f
->csr
;
1150 const intptr_t ea
= ~0x0f & (intptr_t) d
;
1152 return (ea
- pc
) >> 2;
1157 * Generate code to perform color conversion and logic op
1160 * The code generated by this function should also perform dithering.
1163 * The code generated by this function should also perform color-write
1167 * Only two framebuffer formats are supported at this time.
1170 cell_generate_logic_op(struct spe_function
*f
, struct pipe_blend_state
*blend
,
1171 struct pipe_surface
*surf
)
1173 const unsigned logic_op
= (blend
->logicop_enable
)
1174 ? blend
->logicop_func
: PIPE_LOGICOP_COPY
;
1176 /* This code generates a maximum of 37 instructions. An additional 32
1177 * bytes (equiv. to 8 instructions) are needed for data storage. Round up
1178 * to 64 to make it a happy power-of-two.
1180 spe_init_func(f
, 4 * 64);
1183 /* Pixel colors in framebuffer format in AoS layout.
1185 const int pixel
[4] = {
1186 spe_allocate_register(f
, 3),
1187 spe_allocate_register(f
, 4),
1188 spe_allocate_register(f
, 5),
1189 spe_allocate_register(f
, 6),
1192 /* Fragment colors stored as floats in SoA layout.
1194 const int frag
[4] = {
1195 spe_allocate_register(f
, 7),
1196 spe_allocate_register(f
, 8),
1197 spe_allocate_register(f
, 9),
1198 spe_allocate_register(f
, 10),
1201 const int mask
= spe_allocate_register(f
, 11);
1204 /* Short-circuit the noop and invert cases.
1206 if ((logic_op
== PIPE_LOGICOP_NOOP
) || (blend
->colormask
== 0)) {
1209 } else if (logic_op
== PIPE_LOGICOP_INVERT
) {
1210 spe_nor(f
, pixel
[0], pixel
[0], pixel
[0]);
1211 spe_nor(f
, pixel
[1], pixel
[1], pixel
[1]);
1212 spe_nor(f
, pixel
[2], pixel
[2], pixel
[2]);
1213 spe_nor(f
, pixel
[3], pixel
[3], pixel
[3]);
1219 const int tmp
[4] = {
1220 spe_allocate_available_register(f
),
1221 spe_allocate_available_register(f
),
1222 spe_allocate_available_register(f
),
1223 spe_allocate_available_register(f
),
1226 const int shuf_xpose_hi
= spe_allocate_available_register(f
);
1227 const int shuf_xpose_lo
= spe_allocate_available_register(f
);
1228 const int shuf_color
= spe_allocate_available_register(f
);
1231 /* Pointer to the begining of the function's private data area.
1233 uint32_t *const data
= ((uint32_t *) f
->store
) + (64 - 8);
1236 /* Convert fragment colors to framebuffer format in AoS layout.
1238 switch (surf
->format
) {
1239 case PIPE_FORMAT_A8R8G8B8_UNORM
:
1240 data
[0] = 0x00010203;
1241 data
[1] = 0x10111213;
1242 data
[2] = 0x04050607;
1243 data
[3] = 0x14151617;
1244 data
[4] = 0x0c000408;
1245 data
[5] = 0x80808080;
1246 data
[6] = 0x80808080;
1247 data
[7] = 0x80808080;
1249 case PIPE_FORMAT_B8G8R8A8_UNORM
:
1250 data
[0] = 0x03020100;
1251 data
[1] = 0x13121110;
1252 data
[2] = 0x07060504;
1253 data
[3] = 0x17161514;
1254 data
[4] = 0x0804000c;
1255 data
[5] = 0x80808080;
1256 data
[6] = 0x80808080;
1257 data
[7] = 0x80808080;
1260 fprintf(stderr
, "CELL: Bad pixel format in cell_generate_logic_op()");
1264 spe_ilh(f
, tmp
[0], 0x0808);
1265 spe_lqr(f
, shuf_xpose_hi
, PC_OFFSET(f
, data
+0));
1266 spe_lqr(f
, shuf_color
, PC_OFFSET(f
, data
+4));
1267 spe_a(f
, shuf_xpose_lo
, shuf_xpose_hi
, tmp
[0]);
1269 spe_shufb(f
, tmp
[0], frag
[0], frag
[2], shuf_xpose_hi
);
1270 spe_shufb(f
, tmp
[1], frag
[0], frag
[2], shuf_xpose_lo
);
1271 spe_shufb(f
, tmp
[2], frag
[1], frag
[3], shuf_xpose_hi
);
1272 spe_shufb(f
, tmp
[3], frag
[1], frag
[3], shuf_xpose_lo
);
1274 spe_shufb(f
, frag
[0], tmp
[0], tmp
[2], shuf_xpose_hi
);
1275 spe_shufb(f
, frag
[1], tmp
[0], tmp
[2], shuf_xpose_lo
);
1276 spe_shufb(f
, frag
[2], tmp
[1], tmp
[3], shuf_xpose_hi
);
1277 spe_shufb(f
, frag
[3], tmp
[1], tmp
[3], shuf_xpose_lo
);
1279 spe_cfltu(f
, frag
[0], frag
[0], 32);
1280 spe_cfltu(f
, frag
[1], frag
[1], 32);
1281 spe_cfltu(f
, frag
[2], frag
[2], 32);
1282 spe_cfltu(f
, frag
[3], frag
[3], 32);
1284 spe_shufb(f
, frag
[0], frag
[0], pixel
[0], shuf_color
);
1285 spe_shufb(f
, frag
[1], frag
[1], pixel
[1], shuf_color
);
1286 spe_shufb(f
, frag
[2], frag
[2], pixel
[2], shuf_color
);
1287 spe_shufb(f
, frag
[3], frag
[3], pixel
[3], shuf_color
);
1290 /* If logic op is enabled, perform the requested logical operation on the
1291 * converted fragment colors and the pixel colors.
1294 case PIPE_LOGICOP_CLEAR
:
1295 spe_il(f
, frag
[0], 0);
1296 spe_il(f
, frag
[1], 0);
1297 spe_il(f
, frag
[2], 0);
1298 spe_il(f
, frag
[3], 0);
1300 case PIPE_LOGICOP_NOR
:
1301 spe_nor(f
, frag
[0], frag
[0], pixel
[0]);
1302 spe_nor(f
, frag
[1], frag
[1], pixel
[1]);
1303 spe_nor(f
, frag
[2], frag
[2], pixel
[2]);
1304 spe_nor(f
, frag
[3], frag
[3], pixel
[3]);
1306 case PIPE_LOGICOP_AND_INVERTED
:
1307 spe_andc(f
, frag
[0], pixel
[0], frag
[0]);
1308 spe_andc(f
, frag
[1], pixel
[1], frag
[1]);
1309 spe_andc(f
, frag
[2], pixel
[2], frag
[2]);
1310 spe_andc(f
, frag
[3], pixel
[3], frag
[3]);
1312 case PIPE_LOGICOP_COPY_INVERTED
:
1313 spe_nor(f
, frag
[0], frag
[0], frag
[0]);
1314 spe_nor(f
, frag
[1], frag
[1], frag
[1]);
1315 spe_nor(f
, frag
[2], frag
[2], frag
[2]);
1316 spe_nor(f
, frag
[3], frag
[3], frag
[3]);
1318 case PIPE_LOGICOP_AND_REVERSE
:
1319 spe_andc(f
, frag
[0], frag
[0], pixel
[0]);
1320 spe_andc(f
, frag
[1], frag
[1], pixel
[1]);
1321 spe_andc(f
, frag
[2], frag
[2], pixel
[2]);
1322 spe_andc(f
, frag
[3], frag
[3], pixel
[3]);
1324 case PIPE_LOGICOP_XOR
:
1325 spe_xor(f
, frag
[0], frag
[0], pixel
[0]);
1326 spe_xor(f
, frag
[1], frag
[1], pixel
[1]);
1327 spe_xor(f
, frag
[2], frag
[2], pixel
[2]);
1328 spe_xor(f
, frag
[3], frag
[3], pixel
[3]);
1330 case PIPE_LOGICOP_NAND
:
1331 spe_nand(f
, frag
[0], frag
[0], pixel
[0]);
1332 spe_nand(f
, frag
[1], frag
[1], pixel
[1]);
1333 spe_nand(f
, frag
[2], frag
[2], pixel
[2]);
1334 spe_nand(f
, frag
[3], frag
[3], pixel
[3]);
1336 case PIPE_LOGICOP_AND
:
1337 spe_and(f
, frag
[0], frag
[0], pixel
[0]);
1338 spe_and(f
, frag
[1], frag
[1], pixel
[1]);
1339 spe_and(f
, frag
[2], frag
[2], pixel
[2]);
1340 spe_and(f
, frag
[3], frag
[3], pixel
[3]);
1342 case PIPE_LOGICOP_EQUIV
:
1343 spe_eqv(f
, frag
[0], frag
[0], pixel
[0]);
1344 spe_eqv(f
, frag
[1], frag
[1], pixel
[1]);
1345 spe_eqv(f
, frag
[2], frag
[2], pixel
[2]);
1346 spe_eqv(f
, frag
[3], frag
[3], pixel
[3]);
1348 case PIPE_LOGICOP_OR_INVERTED
:
1349 spe_orc(f
, frag
[0], pixel
[0], frag
[0]);
1350 spe_orc(f
, frag
[1], pixel
[1], frag
[1]);
1351 spe_orc(f
, frag
[2], pixel
[2], frag
[2]);
1352 spe_orc(f
, frag
[3], pixel
[3], frag
[3]);
1354 case PIPE_LOGICOP_COPY
:
1356 case PIPE_LOGICOP_OR_REVERSE
:
1357 spe_orc(f
, frag
[0], frag
[0], pixel
[0]);
1358 spe_orc(f
, frag
[1], frag
[1], pixel
[1]);
1359 spe_orc(f
, frag
[2], frag
[2], pixel
[2]);
1360 spe_orc(f
, frag
[3], frag
[3], pixel
[3]);
1362 case PIPE_LOGICOP_OR
:
1363 spe_or(f
, frag
[0], frag
[0], pixel
[0]);
1364 spe_or(f
, frag
[1], frag
[1], pixel
[1]);
1365 spe_or(f
, frag
[2], frag
[2], pixel
[2]);
1366 spe_or(f
, frag
[3], frag
[3], pixel
[3]);
1368 case PIPE_LOGICOP_SET
:
1369 spe_il(f
, frag
[0], ~0);
1370 spe_il(f
, frag
[1], ~0);
1371 spe_il(f
, frag
[2], ~0);
1372 spe_il(f
, frag
[3], ~0);
1375 /* These two cases are short-circuited above.
1377 case PIPE_LOGICOP_INVERT
:
1378 case PIPE_LOGICOP_NOOP
:
1384 /* Apply fragment mask.
1386 spe_ilh(f
, tmp
[0], 0x0000);
1387 spe_ilh(f
, tmp
[1], 0x0404);
1388 spe_ilh(f
, tmp
[2], 0x0808);
1389 spe_ilh(f
, tmp
[3], 0x0c0c);
1391 spe_shufb(f
, tmp
[0], mask
, mask
, tmp
[0]);
1392 spe_shufb(f
, tmp
[1], mask
, mask
, tmp
[1]);
1393 spe_shufb(f
, tmp
[2], mask
, mask
, tmp
[2]);
1394 spe_shufb(f
, tmp
[3], mask
, mask
, tmp
[3]);
1396 spe_selb(f
, pixel
[0], pixel
[0], frag
[0], tmp
[0]);
1397 spe_selb(f
, pixel
[1], pixel
[1], frag
[1], tmp
[1]);
1398 spe_selb(f
, pixel
[2], pixel
[2], frag
[2], tmp
[2]);
1399 spe_selb(f
, pixel
[3], pixel
[3], frag
[3], tmp
[3]);
1405 const uint32_t *p
= f
->store
;
1408 printf("# %u instructions\n", f
->csr
- f
->store
);
1410 printf("\t.text\n");
1411 for (i
= 0; i
< 64; i
++) {
1412 printf("\t.long\t0x%04x\n", p
[i
]);