1 /**************************************************************************
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
31 * Generate SPU per-fragment code (actually per-quad code).
36 #include "pipe/p_defines.h"
37 #include "pipe/p_state.h"
38 #include "rtasm/rtasm_ppc_spe.h"
39 #include "cell_context.h"
40 #include "cell_gen_fragment.h"
44 /** Do extra optimizations? */
45 #define OPTIMIZATIONS 1
49 * Generate SPE code to perform Z/depth testing.
51 * \param dsa Gallium depth/stencil/alpha state to gen code for
52 * \param f SPE function to append instruction onto.
53 * \param mask_reg register containing quad/pixel "alive" mask (in/out)
54 * \param ifragZ_reg register containing integer fragment Z values (in)
55 * \param ifbZ_reg register containing integer frame buffer Z values (in/out)
56 * \param zmask_reg register containing result of Z test/comparison (out)
59 gen_depth_test(const struct pipe_depth_stencil_alpha_state
*dsa
,
60 struct spe_function
*f
,
61 int mask_reg
, int ifragZ_reg
, int ifbZ_reg
, int zmask_reg
)
63 ASSERT(dsa
->depth
.enabled
);
65 switch (dsa
->depth
.func
) {
67 /* zmask = (ifragZ == ref) */
68 spe_ceq(f
, zmask_reg
, ifragZ_reg
, ifbZ_reg
);
69 /* mask = (mask & zmask) */
70 spe_and(f
, mask_reg
, mask_reg
, zmask_reg
);
73 case PIPE_FUNC_NOTEQUAL
:
74 /* zmask = (ifragZ == ref) */
75 spe_ceq(f
, zmask_reg
, ifragZ_reg
, ifbZ_reg
);
76 /* mask = (mask & ~zmask) */
77 spe_andc(f
, mask_reg
, mask_reg
, zmask_reg
);
80 case PIPE_FUNC_GREATER
:
81 /* zmask = (ifragZ > ref) */
82 spe_cgt(f
, zmask_reg
, ifragZ_reg
, ifbZ_reg
);
83 /* mask = (mask & zmask) */
84 spe_and(f
, mask_reg
, mask_reg
, zmask_reg
);
88 /* zmask = (ref > ifragZ) */
89 spe_cgt(f
, zmask_reg
, ifbZ_reg
, ifragZ_reg
);
90 /* mask = (mask & zmask) */
91 spe_and(f
, mask_reg
, mask_reg
, zmask_reg
);
94 case PIPE_FUNC_LEQUAL
:
95 /* zmask = (ifragZ > ref) */
96 spe_cgt(f
, zmask_reg
, ifragZ_reg
, ifbZ_reg
);
97 /* mask = (mask & ~zmask) */
98 spe_andc(f
, mask_reg
, mask_reg
, zmask_reg
);
101 case PIPE_FUNC_GEQUAL
:
102 /* zmask = (ref > ifragZ) */
103 spe_cgt(f
, zmask_reg
, ifbZ_reg
, ifragZ_reg
);
104 /* mask = (mask & ~zmask) */
105 spe_andc(f
, mask_reg
, mask_reg
, zmask_reg
);
108 case PIPE_FUNC_NEVER
:
109 spe_il(f
, mask_reg
, 0); /* mask = {0,0,0,0} */
110 spe_move(f
, zmask_reg
, mask_reg
); /* zmask = mask */
113 case PIPE_FUNC_ALWAYS
:
115 spe_il(f
, zmask_reg
, ~0); /* zmask = {~0,~0,~0,~0} */
123 if (dsa
->depth
.writemask
) {
125 * If (ztest passed) {
126 * framebufferZ = fragmentZ;
129 * framebufferZ = (ztest_passed ? fragmentZ : framebufferZ;
131 spe_selb(f
, ifbZ_reg
, ifbZ_reg
, ifragZ_reg
, mask_reg
);
137 * Generate SPE code to perform alpha testing.
139 * \param dsa Gallium depth/stencil/alpha state to gen code for
140 * \param f SPE function to append instruction onto.
141 * \param mask_reg register containing quad/pixel "alive" mask (in/out)
142 * \param fragA_reg register containing four fragment alpha values (in)
145 gen_alpha_test(const struct pipe_depth_stencil_alpha_state
*dsa
,
146 struct spe_function
*f
, int mask_reg
, int fragA_reg
)
148 int ref_reg
= spe_allocate_available_register(f
);
149 int amask_reg
= spe_allocate_available_register(f
);
151 ASSERT(dsa
->alpha
.enabled
);
153 if ((dsa
->alpha
.func
!= PIPE_FUNC_NEVER
) &&
154 (dsa
->alpha
.func
!= PIPE_FUNC_ALWAYS
)) {
155 /* load/splat the alpha reference float value */
156 spe_load_float(f
, ref_reg
, dsa
->alpha
.ref
);
159 /* emit code to do the alpha comparison, updating 'mask' */
160 switch (dsa
->alpha
.func
) {
161 case PIPE_FUNC_EQUAL
:
162 /* amask = (fragA == ref) */
163 spe_fceq(f
, amask_reg
, fragA_reg
, ref_reg
);
164 /* mask = (mask & amask) */
165 spe_and(f
, mask_reg
, mask_reg
, amask_reg
);
168 case PIPE_FUNC_NOTEQUAL
:
169 /* amask = (fragA == ref) */
170 spe_fceq(f
, amask_reg
, fragA_reg
, ref_reg
);
171 /* mask = (mask & ~amask) */
172 spe_andc(f
, mask_reg
, mask_reg
, amask_reg
);
175 case PIPE_FUNC_GREATER
:
176 /* amask = (fragA > ref) */
177 spe_fcgt(f
, amask_reg
, fragA_reg
, ref_reg
);
178 /* mask = (mask & amask) */
179 spe_and(f
, mask_reg
, mask_reg
, amask_reg
);
183 /* amask = (ref > fragA) */
184 spe_fcgt(f
, amask_reg
, ref_reg
, fragA_reg
);
185 /* mask = (mask & amask) */
186 spe_and(f
, mask_reg
, mask_reg
, amask_reg
);
189 case PIPE_FUNC_LEQUAL
:
190 /* amask = (fragA > ref) */
191 spe_fcgt(f
, amask_reg
, fragA_reg
, ref_reg
);
192 /* mask = (mask & ~amask) */
193 spe_andc(f
, mask_reg
, mask_reg
, amask_reg
);
196 case PIPE_FUNC_GEQUAL
:
197 /* amask = (ref > fragA) */
198 spe_fcgt(f
, amask_reg
, ref_reg
, fragA_reg
);
199 /* mask = (mask & ~amask) */
200 spe_andc(f
, mask_reg
, mask_reg
, amask_reg
);
203 case PIPE_FUNC_NEVER
:
204 spe_il(f
, mask_reg
, 0); /* mask = [0,0,0,0] */
207 case PIPE_FUNC_ALWAYS
:
208 /* no-op, mask unchanged */
217 /* if mask == {0,0,0,0} we're all done, return */
219 /* re-use amask reg here */
220 int tmp_reg
= amask_reg
;
221 /* tmp[0] = (mask[0] | mask[1] | mask[2] | mask[3]) */
222 spe_orx(f
, tmp_reg
, mask_reg
);
223 /* if tmp[0] == 0 then return from function call */
224 spe_biz(f
, tmp_reg
, SPE_REG_RA
, 0, 0);
228 spe_release_register(f
, ref_reg
);
229 spe_release_register(f
, amask_reg
);
232 /* This is a convenient and oft-used sequence. It chooses
233 * the smaller of each element of reg1 and reg2, and combines them
234 * into the result register, as follows:
236 * The Float Compare Greater Than (fcgt) instruction will put
237 * 1s into compare_reg where reg1 > reg2, and 0s where reg1 <= reg2.
239 * Then the Select Bits (selb) instruction will take bits from
240 * reg1 where compare_reg is 0, and from reg2 where compare_reg is
241 * 1. Ergo, result_reg will have the bits from reg1 where reg1 <= reg2,
242 * and the bits from reg2 where reg1 > reg2, which is exactly the
245 #define FLOAT_VECTOR_MIN(f, result_reg, reg1, reg2) {\
246 int compare_reg = spe_allocate_available_register(f); \
247 spe_fcgt(f, compare_reg, reg1, reg2); \
248 spe_selb(f, result_reg, reg1, reg2, compare_reg); \
249 spe_release_register(f, compare_reg); \
252 /* The FLOAT_VECTOR_MAX sequence is similar to the FLOAT_VECTOR_MIN
253 * sequence above, except that the registers specified when selecting
256 #define FLOAT_VECTOR_MAX(f, result_reg, reg1, reg2) {\
257 int compare_reg = spe_allocate_available_register(f); \
258 spe_fcgt(f, compare_reg, reg1, reg2); \
259 spe_selb(f, result_reg, reg2, reg1, compare_reg); \
260 spe_release_register(f, compare_reg); \
264 * Generate SPE code to implement the given blend mode for a quad of pixels.
265 * \param f SPE function to append instruction onto.
266 * \param fragR_reg register with fragment red values (float) (in/out)
267 * \param fragG_reg register with fragment green values (float) (in/out)
268 * \param fragB_reg register with fragment blue values (float) (in/out)
269 * \param fragA_reg register with fragment alpha values (float) (in/out)
270 * \param fbRGBA_reg register with packed framebuffer colors (integer) (in)
273 gen_blend(const struct pipe_blend_state
*blend
,
274 const struct pipe_blend_color
*blend_color
,
275 struct spe_function
*f
,
276 enum pipe_format color_format
,
277 int fragR_reg
, int fragG_reg
, int fragB_reg
, int fragA_reg
,
280 int term1R_reg
= spe_allocate_available_register(f
);
281 int term1G_reg
= spe_allocate_available_register(f
);
282 int term1B_reg
= spe_allocate_available_register(f
);
283 int term1A_reg
= spe_allocate_available_register(f
);
285 int term2R_reg
= spe_allocate_available_register(f
);
286 int term2G_reg
= spe_allocate_available_register(f
);
287 int term2B_reg
= spe_allocate_available_register(f
);
288 int term2A_reg
= spe_allocate_available_register(f
);
290 int fbR_reg
= spe_allocate_available_register(f
);
291 int fbG_reg
= spe_allocate_available_register(f
);
292 int fbB_reg
= spe_allocate_available_register(f
);
293 int fbA_reg
= spe_allocate_available_register(f
);
295 int tmp_reg
= spe_allocate_available_register(f
);
297 /* These values might or might not eventually get put into
298 * registers. We avoid allocating them and setting them until
299 * they're actually needed; then we avoid setting them more than
300 * once, and release them at the end of code generation.
302 boolean one_reg_set
= false;
304 #define SET_ONE_REG_IF_UNSET(f) if (!one_reg_set) {\
305 one_reg = spe_allocate_available_register(f); \
306 spe_load_float(f, one_reg, 1.0f); \
307 one_reg_set = true; \
309 #define RELEASE_ONE_REG_IF_USED(f) if (one_reg_set) {\
310 spe_release_register(f, one_reg); \
313 boolean const_color_set
= false;
314 int constR_reg
, constG_reg
, constB_reg
;
315 #define SET_CONST_COLOR_IF_UNSET(f, blend_color) if (!const_color_set) {\
316 constR_reg = spe_allocate_available_register(f); \
317 constG_reg = spe_allocate_available_register(f); \
318 constG_reg = spe_allocate_available_register(f); \
319 spe_load_float(f, constR_reg, blend_color->color[0]); \
320 spe_load_float(f, constG_reg, blend_color->color[1]); \
321 spe_load_float(f, constB_reg, blend_color->color[2]); \
322 const_color_set = true;\
324 #define RELEASE_CONST_COLOR_IF_USED(f) if (const_color_set) {\
325 spe_release_register(f, constR_reg); \
326 spe_release_register(f, constG_reg); \
327 spe_release_register(f, constB_reg); \
330 boolean const_alpha_set
= false;
332 #define SET_CONST_ALPHA_IF_UNSET(f, blend_color) if (!const_alpha_set) {\
333 constA_reg = spe_allocate_available_register(f); \
334 spe_load_float(f, constA_reg, blend_color->color[3]); \
335 const_alpha_set = true; \
337 #define RELEASE_CONST_ALPHA_IF_USED(f) if (const_alpha_set) {\
338 spe_release_register(f, constA_reg); \
341 /* Real code starts here */
343 ASSERT(blend
->blend_enable
);
345 /* Unpack/convert framebuffer colors from four 32-bit packed colors
346 * (fbRGBA) to four float RGBA vectors (fbR, fbG, fbB, fbA).
347 * Each 8-bit color component is expanded into a float in [0.0, 1.0].
350 int mask_reg
= spe_allocate_available_register(f
);
352 /* mask = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff} */
353 spe_load_int(f
, mask_reg
, 0xff);
355 /* XXX there may be more clever ways to implement the following code */
356 switch (color_format
) {
357 case PIPE_FORMAT_A8R8G8B8_UNORM
:
358 /* fbB = fbB & mask */
359 spe_and(f
, fbB_reg
, fbRGBA_reg
, mask_reg
);
360 /* mask = mask << 8 */
361 spe_roti(f
, mask_reg
, mask_reg
, 8);
363 /* fbG = fbRGBA & mask */
364 spe_and(f
, fbG_reg
, fbRGBA_reg
, mask_reg
);
366 spe_roti(f
, fbG_reg
, fbG_reg
, -8);
367 /* mask = mask << 8 */
368 spe_roti(f
, mask_reg
, mask_reg
, 8);
370 /* fbR = fbRGBA & mask */
371 spe_and(f
, fbR_reg
, fbRGBA_reg
, mask_reg
);
372 /* fbR = fbR >> 16 */
373 spe_roti(f
, fbR_reg
, fbR_reg
, -16);
374 /* mask = mask << 8 */
375 spe_roti(f
, mask_reg
, mask_reg
, 8);
377 /* fbA = fbRGBA & mask */
378 spe_and(f
, fbA_reg
, fbRGBA_reg
, mask_reg
);
379 /* fbA = fbA >> 24 */
380 spe_roti(f
, fbA_reg
, fbA_reg
, -24);
383 case PIPE_FORMAT_B8G8R8A8_UNORM
:
384 /* fbA = fbA & mask */
385 spe_and(f
, fbA_reg
, fbRGBA_reg
, mask_reg
);
386 /* mask = mask << 8 */
387 spe_roti(f
, mask_reg
, mask_reg
, 8);
389 /* fbR = fbRGBA & mask */
390 spe_and(f
, fbR_reg
, fbRGBA_reg
, mask_reg
);
392 spe_roti(f
, fbR_reg
, fbR_reg
, -8);
393 /* mask = mask << 8 */
394 spe_roti(f
, mask_reg
, mask_reg
, 8);
396 /* fbG = fbRGBA & mask */
397 spe_and(f
, fbG_reg
, fbRGBA_reg
, mask_reg
);
398 /* fbG = fbG >> 16 */
399 spe_roti(f
, fbG_reg
, fbG_reg
, -16);
400 /* mask = mask << 8 */
401 spe_roti(f
, mask_reg
, mask_reg
, 8);
403 /* fbB = fbRGBA & mask */
404 spe_and(f
, fbB_reg
, fbRGBA_reg
, mask_reg
);
405 /* fbB = fbB >> 24 */
406 spe_roti(f
, fbB_reg
, fbB_reg
, -24);
413 /* convert int[4] in [0,255] to float[4] in [0.0, 1.0] */
414 spe_cuflt(f
, fbR_reg
, fbR_reg
, 8);
415 spe_cuflt(f
, fbG_reg
, fbG_reg
, 8);
416 spe_cuflt(f
, fbB_reg
, fbB_reg
, 8);
417 spe_cuflt(f
, fbA_reg
, fbA_reg
, 8);
419 spe_release_register(f
, mask_reg
);
424 * Compute Src RGB terms. We're actually looking for the value
425 * of (the appropriate RGB factors) * (the incoming source RGB color).
427 switch (blend
->rgb_src_factor
) {
428 case PIPE_BLENDFACTOR_ONE
:
429 /* factors = (1,1,1), so term = (R,G,B) */
430 spe_move(f
, term1R_reg
, fragR_reg
);
431 spe_move(f
, term1G_reg
, fragG_reg
);
432 spe_move(f
, term1B_reg
, fragB_reg
);
434 case PIPE_BLENDFACTOR_ZERO
:
435 /* factors = (0,0,0), so term = (0,0,0) */
436 spe_load_float(f
, term1R_reg
, 0.0f
);
437 spe_load_float(f
, term1G_reg
, 0.0f
);
438 spe_load_float(f
, term1B_reg
, 0.0f
);
440 case PIPE_BLENDFACTOR_SRC_COLOR
:
441 /* factors = (R,G,B), so term = (R*R, G*G, B*B) */
442 spe_fm(f
, term1R_reg
, fragR_reg
, fragR_reg
);
443 spe_fm(f
, term1G_reg
, fragG_reg
, fragG_reg
);
444 spe_fm(f
, term1B_reg
, fragB_reg
, fragB_reg
);
446 case PIPE_BLENDFACTOR_SRC_ALPHA
:
447 /* factors = (A,A,A), so term = (R*A, G*A, B*A) */
448 spe_fm(f
, term1R_reg
, fragR_reg
, fragA_reg
);
449 spe_fm(f
, term1G_reg
, fragG_reg
, fragA_reg
);
450 spe_fm(f
, term1B_reg
, fragB_reg
, fragA_reg
);
452 case PIPE_BLENDFACTOR_INV_SRC_COLOR
:
453 /* factors = (1-R,1-G,1-B), so term = (R*(1-R), G*(1-G), B*(1-B)) */
454 /* we'll need the optional constant {1,1,1,1} register */
455 SET_ONE_REG_IF_UNSET(f
)
457 spe_fs(f
, tmp_reg
, one_reg
, fragR_reg
);
459 spe_fm(f
, term1R_reg
, fragR_reg
, tmp_reg
);
460 /* repeat for G and B */
461 spe_fs(f
, tmp_reg
, one_reg
, fragG_reg
);
462 spe_fm(f
, term1G_reg
, fragG_reg
, tmp_reg
);
463 spe_fs(f
, tmp_reg
, one_reg
, fragB_reg
);
464 spe_fm(f
, term1B_reg
, fragB_reg
, tmp_reg
);
466 case PIPE_BLENDFACTOR_DST_COLOR
:
467 /* factors = (Rfb,Gfb,Bfb), so term = (R*Rfb, G*Gfb, B*Bfb) */
468 spe_fm(f
, term1R_reg
, fragR_reg
, fbR_reg
);
469 spe_fm(f
, term1G_reg
, fragG_reg
, fbG_reg
);
470 spe_fm(f
, term1B_reg
, fragB_reg
, fbB_reg
);
472 case PIPE_BLENDFACTOR_INV_DST_COLOR
:
473 /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (R*(1-Rfb),G*(1-Gfb),B*(1-Bfb)) */
474 /* we'll need the optional constant {1,1,1,1} register */
475 SET_ONE_REG_IF_UNSET(f
)
477 spe_fs(f
, tmp_reg
, one_reg
, fbR_reg
);
479 spe_fm(f
, term1R_reg
, fragR_reg
, tmp_reg
);
480 /* repeat for G and B */
481 spe_fs(f
, tmp_reg
, one_reg
, fbG_reg
);
482 spe_fm(f
, term1G_reg
, fragG_reg
, tmp_reg
);
483 spe_fs(f
, tmp_reg
, one_reg
, fbB_reg
);
484 spe_fm(f
, term1B_reg
, fragB_reg
, tmp_reg
);
486 case PIPE_BLENDFACTOR_INV_SRC_ALPHA
:
487 /* factors = (1-A,1-A,1-A), so term = (R*(1-A),G*(1-A),B*(1-A)) */
488 /* we'll need the optional constant {1,1,1,1} register */
489 SET_ONE_REG_IF_UNSET(f
)
491 spe_fs(f
, tmp_reg
, one_reg
, fragA_reg
);
493 spe_fm(f
, term1R_reg
, fragR_reg
, tmp_reg
);
494 /* repeat for G and B with the same (1-A) factor */
495 spe_fm(f
, term1G_reg
, fragG_reg
, tmp_reg
);
496 spe_fm(f
, term1B_reg
, fragB_reg
, tmp_reg
);
498 case PIPE_BLENDFACTOR_DST_ALPHA
:
499 /* factors = (Afb, Afb, Afb), so term = (R*Afb, G*Afb, B*Afb) */
500 spe_fm(f
, term1R_reg
, fragR_reg
, fbA_reg
);
501 spe_fm(f
, term1G_reg
, fragG_reg
, fbA_reg
);
502 spe_fm(f
, term1B_reg
, fragB_reg
, fbA_reg
);
504 case PIPE_BLENDFACTOR_INV_DST_ALPHA
:
505 /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (R*(1-Afb),G*(1-Afb),B*(1-Afb)) */
506 /* we'll need the optional constant {1,1,1,1} register */
507 SET_ONE_REG_IF_UNSET(f
)
509 spe_fs(f
, tmp_reg
, one_reg
, fbA_reg
);
510 /* term = R * tmp, G*tmp, and B*tmp */
511 spe_fm(f
, term1R_reg
, fragR_reg
, tmp_reg
);
512 spe_fm(f
, term1G_reg
, fragG_reg
, tmp_reg
);
513 spe_fm(f
, term1B_reg
, fragB_reg
, tmp_reg
);
515 case PIPE_BLENDFACTOR_CONST_COLOR
:
516 /* We'll need the optional blend color registers */
517 SET_CONST_COLOR_IF_UNSET(f
,blend_color
)
518 /* now, factor = (Rc,Gc,Bc), so term = (R*Rc,G*Gc,B*Bc) */
519 spe_fm(f
, term1R_reg
, fragR_reg
, constR_reg
);
520 spe_fm(f
, term1G_reg
, fragG_reg
, constG_reg
);
521 spe_fm(f
, term1B_reg
, fragB_reg
, constB_reg
);
523 case PIPE_BLENDFACTOR_CONST_ALPHA
:
524 /* we'll need the optional constant alpha register */
525 SET_CONST_ALPHA_IF_UNSET(f
, blend_color
)
526 /* factor = (Ac,Ac,Ac), so term = (R*Ac,G*Ac,B*Ac) */
527 spe_fm(f
, term1R_reg
, fragR_reg
, constA_reg
);
528 spe_fm(f
, term1G_reg
, fragG_reg
, constA_reg
);
529 spe_fm(f
, term1B_reg
, fragB_reg
, constA_reg
);
531 case PIPE_BLENDFACTOR_INV_CONST_COLOR
:
532 /* We need both the optional {1,1,1,1} register, and the optional
533 * constant color registers
535 SET_ONE_REG_IF_UNSET(f
)
536 SET_CONST_COLOR_IF_UNSET(f
, blend_color
)
537 /* factor = (1-Rc,1-Gc,1-Bc), so term = (R*(1-Rc),G*(1-Gc),B*(1-Bc)) */
538 spe_fs(f
, tmp_reg
, one_reg
, constR_reg
);
539 spe_fm(f
, term1R_reg
, fragR_reg
, tmp_reg
);
540 spe_fs(f
, tmp_reg
, one_reg
, constG_reg
);
541 spe_fm(f
, term1G_reg
, fragG_reg
, tmp_reg
);
542 spe_fs(f
, tmp_reg
, one_reg
, constB_reg
);
543 spe_fm(f
, term1B_reg
, fragB_reg
, tmp_reg
);
545 case PIPE_BLENDFACTOR_INV_CONST_ALPHA
:
546 /* We need the optional {1,1,1,1} register and the optional
547 * constant alpha register
549 SET_ONE_REG_IF_UNSET(f
)
550 SET_CONST_ALPHA_IF_UNSET(f
, blend_color
)
551 /* factor = (1-Ac,1-Ac,1-Ac), so term = (R*(1-Ac),G*(1-Ac),B*(1-Ac)) */
552 spe_fs(f
, tmp_reg
, one_reg
, constA_reg
);
553 spe_fm(f
, term1R_reg
, fragR_reg
, tmp_reg
);
554 spe_fm(f
, term1G_reg
, fragG_reg
, tmp_reg
);
555 spe_fm(f
, term1B_reg
, fragB_reg
, tmp_reg
);
557 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE
:
558 /* We'll need the optional {1,1,1,1} register */
559 SET_ONE_REG_IF_UNSET(f
)
560 /* factor = (min(A,1-Afb),min(A,1-Afb),min(A,1-Afb)), so
561 * term = (R*min(A,1-Afb), G*min(A,1-Afb), B*min(A,1-Afb))
564 spe_fs(f
, tmp_reg
, one_reg
, fbA_reg
);
565 /* tmp = min(A,tmp) */
566 FLOAT_VECTOR_MIN(f
, tmp_reg
, fragA_reg
, tmp_reg
)
568 spe_fm(f
, term1R_reg
, fragR_reg
, tmp_reg
);
569 spe_fm(f
, term1G_reg
, fragG_reg
, tmp_reg
);
570 spe_fm(f
, term1B_reg
, fragB_reg
, tmp_reg
);
573 /* non-OpenGL cases? */
574 case PIPE_BLENDFACTOR_SRC1_COLOR
:
575 case PIPE_BLENDFACTOR_SRC1_ALPHA
:
576 case PIPE_BLENDFACTOR_INV_SRC1_COLOR
:
577 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA
:
584 * Compute Src Alpha term
586 switch (blend
->alpha_src_factor
) {
587 case PIPE_BLENDFACTOR_ONE
:
588 spe_move(f
, term1A_reg
, fragA_reg
);
590 case PIPE_BLENDFACTOR_SRC_COLOR
:
591 spe_fm(f
, term1A_reg
, fragA_reg
, fragA_reg
);
593 case PIPE_BLENDFACTOR_SRC_ALPHA
:
594 spe_fm(f
, term1A_reg
, fragA_reg
, fragA_reg
);
602 * Compute Dest RGB terms
604 switch (blend
->rgb_dst_factor
) {
605 case PIPE_BLENDFACTOR_ONE
:
606 spe_move(f
, term2R_reg
, fbR_reg
);
607 spe_move(f
, term2G_reg
, fbG_reg
);
608 spe_move(f
, term2B_reg
, fbB_reg
);
610 case PIPE_BLENDFACTOR_ZERO
:
611 spe_zero(f
, term2R_reg
);
612 spe_zero(f
, term2G_reg
);
613 spe_zero(f
, term2B_reg
);
615 case PIPE_BLENDFACTOR_SRC_COLOR
:
616 spe_fm(f
, term2R_reg
, fbR_reg
, fragR_reg
);
617 spe_fm(f
, term2G_reg
, fbG_reg
, fragG_reg
);
618 spe_fm(f
, term2B_reg
, fbB_reg
, fragB_reg
);
620 case PIPE_BLENDFACTOR_SRC_ALPHA
:
621 spe_fm(f
, term2R_reg
, fbR_reg
, fragA_reg
);
622 spe_fm(f
, term2G_reg
, fbG_reg
, fragA_reg
);
623 spe_fm(f
, term2B_reg
, fbB_reg
, fragA_reg
);
625 case PIPE_BLENDFACTOR_INV_SRC_ALPHA
:
627 /* one = {1.0, 1.0, 1.0, 1.0} */
629 one_reg
= spe_allocate_available_register(f
);
630 spe_load_float(f
, one_reg
, 1.0f
);
633 /* tmp = one - fragA */
634 spe_fs(f
, tmp_reg
, one_reg
, fragA_reg
);
635 /* term = fb * tmp */
636 spe_fm(f
, term2R_reg
, fbR_reg
, tmp_reg
);
637 spe_fm(f
, term2G_reg
, fbG_reg
, tmp_reg
);
638 spe_fm(f
, term2B_reg
, fbB_reg
, tmp_reg
);
640 /* Compute: term2x = fbx * (1.0 - fragA)
641 * Which is: term2x = fbx - fbx * fragA
642 * Use fnms t,a,b,c which computes t=c-a*b
644 spe_fnms(f
, term2R_reg
, fbR_reg
, fragA_reg
, fbR_reg
);
645 spe_fnms(f
, term2G_reg
, fbG_reg
, fragA_reg
, fbG_reg
);
646 spe_fnms(f
, term2B_reg
, fbB_reg
, fragA_reg
, fbB_reg
);
650 // GL_ONE_MINUS_SRC_COLOR
652 // GL_ONE_MINUS_DST_COLOR
655 // GL_ONE_MINUS_CONSTANT_COLOR
657 // GL_ONE_MINUS_CONSTANT_ALPHA
663 * Compute Dest Alpha term
665 switch (blend
->alpha_dst_factor
) {
666 case PIPE_BLENDFACTOR_ONE
:
667 spe_move(f
, term2A_reg
, fbA_reg
);
669 case PIPE_BLENDFACTOR_ZERO
:
670 spe_zero(f
, term2A_reg
);
672 case PIPE_BLENDFACTOR_SRC_ALPHA
:
673 spe_fm(f
, term2A_reg
, fbA_reg
, fragA_reg
);
675 case PIPE_BLENDFACTOR_INV_SRC_ALPHA
:
677 /* one = {1.0, 1.0, 1.0, 1.0} */
679 one_reg
= spe_allocate_available_register(f
);
680 spe_load_float(f
, one_reg
, 1.0f
);
683 /* tmp = one - fragA */
684 spe_fs(f
, tmp_reg
, one_reg
, fragA_reg
);
685 /* termA = fbA * tmp */
686 spe_fm(f
, term2A_reg
, fbA_reg
, tmp_reg
);
688 /* Compute: term2A = fbA * (1.0 - fragA)
689 * Which is: term2A = fbA - fbA * fragA
690 * Use fnms t,a,b,c which computes t=c-a*b
692 spe_fnms(f
, term2A_reg
, fbA_reg
, fragA_reg
, fbA_reg
);
696 // GL_ONE_MINUS_SRC_COLOR
698 // GL_ONE_MINUS_DST_COLOR
701 // GL_ONE_MINUS_CONSTANT_COLOR
703 // GL_ONE_MINUS_CONSTANT_ALPHA
709 * Combine Src/Dest RGB terms
711 switch (blend
->rgb_func
) {
713 spe_fa(f
, fragR_reg
, term1R_reg
, term2R_reg
);
714 spe_fa(f
, fragG_reg
, term1G_reg
, term2G_reg
);
715 spe_fa(f
, fragB_reg
, term1B_reg
, term2B_reg
);
717 case PIPE_BLEND_SUBTRACT
:
718 spe_fs(f
, fragR_reg
, term1R_reg
, term2R_reg
);
719 spe_fs(f
, fragG_reg
, term1G_reg
, term2G_reg
);
720 spe_fs(f
, fragB_reg
, term1B_reg
, term2B_reg
);
722 case PIPE_BLEND_REVERSE_SUBTRACT
:
723 spe_fs(f
, fragR_reg
, term2R_reg
, term1R_reg
);
724 spe_fs(f
, fragG_reg
, term2G_reg
, term1G_reg
);
725 spe_fs(f
, fragB_reg
, term2B_reg
, term1B_reg
);
728 FLOAT_VECTOR_MIN(f
, fragR_reg
, term1R_reg
, term2R_reg
)
729 FLOAT_VECTOR_MIN(f
, fragG_reg
, term1G_reg
, term2G_reg
)
730 FLOAT_VECTOR_MIN(f
, fragB_reg
, term1B_reg
, term2B_reg
)
733 FLOAT_VECTOR_MAX(f
, fragR_reg
, term1R_reg
, term2R_reg
)
734 FLOAT_VECTOR_MAX(f
, fragG_reg
, term1G_reg
, term2G_reg
)
735 FLOAT_VECTOR_MAX(f
, fragB_reg
, term1B_reg
, term2B_reg
)
742 * Combine Src/Dest A term
744 switch (blend
->alpha_func
) {
746 spe_fa(f
, fragA_reg
, term1A_reg
, term2A_reg
);
748 case PIPE_BLEND_SUBTRACT
:
749 spe_fs(f
, fragA_reg
, term1A_reg
, term2A_reg
);
751 case PIPE_BLEND_REVERSE_SUBTRACT
:
752 spe_fs(f
, fragA_reg
, term2A_reg
, term1A_reg
);
755 FLOAT_VECTOR_MIN(f
, fragA_reg
, term1A_reg
, term2A_reg
)
758 FLOAT_VECTOR_MAX(f
, fragA_reg
, term1A_reg
, term2A_reg
)
764 spe_release_register(f
, term1R_reg
);
765 spe_release_register(f
, term1G_reg
);
766 spe_release_register(f
, term1B_reg
);
767 spe_release_register(f
, term1A_reg
);
769 spe_release_register(f
, term2R_reg
);
770 spe_release_register(f
, term2G_reg
);
771 spe_release_register(f
, term2B_reg
);
772 spe_release_register(f
, term2A_reg
);
774 spe_release_register(f
, fbR_reg
);
775 spe_release_register(f
, fbG_reg
);
776 spe_release_register(f
, fbB_reg
);
777 spe_release_register(f
, fbA_reg
);
779 spe_release_register(f
, tmp_reg
);
781 /* Free any optional registers that actually got used */
782 RELEASE_ONE_REG_IF_USED(f
)
783 RELEASE_CONST_COLOR_IF_USED(f
)
784 RELEASE_CONST_ALPHA_IF_USED(f
)
789 gen_logicop(const struct pipe_blend_state
*blend
,
790 struct spe_function
*f
,
791 int fragRGBA_reg
, int fbRGBA_reg
)
794 /* operate on 32-bit packed pixels, not float colors */
799 gen_colormask(uint colormask
,
800 struct spe_function
*f
,
801 int fragRGBA_reg
, int fbRGBA_reg
)
804 /* operate on 32-bit packed pixels, not float colors */
810 * Generate code to pack a quad of float colors into a four 32-bit integers.
812 * \param f SPE function to append instruction onto.
813 * \param color_format the dest color packing format
814 * \param r_reg register containing four red values (in/clobbered)
815 * \param g_reg register containing four green values (in/clobbered)
816 * \param b_reg register containing four blue values (in/clobbered)
817 * \param a_reg register containing four alpha values (in/clobbered)
818 * \param rgba_reg register to store the packed RGBA colors (out)
821 gen_pack_colors(struct spe_function
*f
,
822 enum pipe_format color_format
,
823 int r_reg
, int g_reg
, int b_reg
, int a_reg
,
826 /* Convert float[4] in [0.0,1.0] to int[4] in [0,~0], with clamping */
827 spe_cfltu(f
, r_reg
, r_reg
, 32);
828 spe_cfltu(f
, g_reg
, g_reg
, 32);
829 spe_cfltu(f
, b_reg
, b_reg
, 32);
830 spe_cfltu(f
, a_reg
, a_reg
, 32);
832 /* Shift the most significant bytes to least the significant positions.
833 * I.e.: reg = reg >> 24
835 spe_rotmi(f
, r_reg
, r_reg
, -24);
836 spe_rotmi(f
, g_reg
, g_reg
, -24);
837 spe_rotmi(f
, b_reg
, b_reg
, -24);
838 spe_rotmi(f
, a_reg
, a_reg
, -24);
840 /* Shift the color bytes according to the surface format */
841 if (color_format
== PIPE_FORMAT_A8R8G8B8_UNORM
) {
842 spe_roti(f
, g_reg
, g_reg
, 8); /* green <<= 8 */
843 spe_roti(f
, r_reg
, r_reg
, 16); /* red <<= 16 */
844 spe_roti(f
, a_reg
, a_reg
, 24); /* alpha <<= 24 */
846 else if (color_format
== PIPE_FORMAT_B8G8R8A8_UNORM
) {
847 spe_roti(f
, r_reg
, r_reg
, 8); /* red <<= 8 */
848 spe_roti(f
, g_reg
, g_reg
, 16); /* green <<= 16 */
849 spe_roti(f
, b_reg
, b_reg
, 24); /* blue <<= 24 */
855 /* Merge red, green, blue, alpha registers to make packed RGBA colors.
856 * Eg: after shifting according to color_format we might have:
857 * R = {0x00ff0000, 0x00110000, 0x00220000, 0x00330000}
858 * G = {0x0000ff00, 0x00004400, 0x00005500, 0x00006600}
859 * B = {0x000000ff, 0x00000077, 0x00000088, 0x00000099}
860 * A = {0xff000000, 0xaa000000, 0xbb000000, 0xcc000000}
861 * OR-ing all those together gives us four packed colors:
862 * RGBA = {0xffffffff, 0xaa114477, 0xbb225588, 0xcc336699}
864 spe_or(f
, rgba_reg
, r_reg
, g_reg
);
865 spe_or(f
, rgba_reg
, rgba_reg
, b_reg
);
866 spe_or(f
, rgba_reg
, rgba_reg
, a_reg
);
873 * Generate SPE code to implement the fragment operations (alpha test,
874 * depth test, stencil test, blending, colormask, and final
875 * framebuffer write) as specified by the current context state.
877 * Logically, this code will be called after running the fragment
878 * shader. But under some circumstances we could run some of this
879 * code before the fragment shader to cull fragments/quads that are
880 * totally occluded/discarded.
882 * XXX we only support PIPE_FORMAT_Z24S8_UNORM z/stencil buffer right now.
884 * See the spu_default_fragment_ops() function to see how the per-fragment
885 * operations would be done with ordinary C code.
886 * The code we generate here though has no branches, is SIMD, etc and
887 * should be much faster.
889 * \param cell the rendering context (in)
890 * \param f the generated function (out)
893 cell_gen_fragment_function(struct cell_context
*cell
, struct spe_function
*f
)
895 const struct pipe_depth_stencil_alpha_state
*dsa
=
896 &cell
->depth_stencil
->base
;
897 const struct pipe_blend_state
*blend
= &cell
->blend
->base
;
898 const struct pipe_blend_color
*blend_color
= &cell
->blend_color
;
899 const enum pipe_format color_format
= cell
->framebuffer
.cbufs
[0]->format
;
901 /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
902 const int x_reg
= 3; /* uint */
903 const int y_reg
= 4; /* uint */
904 const int color_tile_reg
= 5; /* tile_t * */
905 const int depth_tile_reg
= 6; /* tile_t * */
906 const int fragZ_reg
= 7; /* vector float */
907 const int fragR_reg
= 8; /* vector float */
908 const int fragG_reg
= 9; /* vector float */
909 const int fragB_reg
= 10; /* vector float */
910 const int fragA_reg
= 11; /* vector float */
911 const int mask_reg
= 12; /* vector uint */
913 /* offset of quad from start of tile
914 * XXX assuming 4-byte pixels for color AND Z/stencil!!!!
918 int fbRGBA_reg
; /**< framebuffer's RGBA colors for quad */
919 int fbZS_reg
; /**< framebuffer's combined z/stencil values for quad */
921 spe_init_func(f
, SPU_MAX_FRAGMENT_OPS_INSTS
* SPE_INST_SIZE
);
923 if (cell
->debug_flags
& CELL_DEBUG_ASM
) {
924 spe_print_code(f
, true);
926 spe_comment(f
, -4, "Begin per-fragment ops");
929 spe_allocate_register(f
, x_reg
);
930 spe_allocate_register(f
, y_reg
);
931 spe_allocate_register(f
, color_tile_reg
);
932 spe_allocate_register(f
, depth_tile_reg
);
933 spe_allocate_register(f
, fragZ_reg
);
934 spe_allocate_register(f
, fragR_reg
);
935 spe_allocate_register(f
, fragG_reg
);
936 spe_allocate_register(f
, fragB_reg
);
937 spe_allocate_register(f
, fragA_reg
);
938 spe_allocate_register(f
, mask_reg
);
940 quad_offset_reg
= spe_allocate_available_register(f
);
941 fbRGBA_reg
= spe_allocate_available_register(f
);
942 fbZS_reg
= spe_allocate_available_register(f
);
944 /* compute offset of quad from start of tile, in bytes */
946 int x2_reg
= spe_allocate_available_register(f
);
947 int y2_reg
= spe_allocate_available_register(f
);
949 ASSERT(TILE_SIZE
== 32);
951 spe_rotmi(f
, x2_reg
, x_reg
, -1); /* x2 = x / 2 */
952 spe_rotmi(f
, y2_reg
, y_reg
, -1); /* y2 = y / 2 */
953 spe_shli(f
, y2_reg
, y2_reg
, 4); /* y2 *= 16 */
954 spe_a(f
, quad_offset_reg
, y2_reg
, x2_reg
); /* offset = y2 + x2 */
955 spe_shli(f
, quad_offset_reg
, quad_offset_reg
, 4); /* offset *= 16 */
957 spe_release_register(f
, x2_reg
);
958 spe_release_register(f
, y2_reg
);
962 if (dsa
->alpha
.enabled
) {
963 gen_alpha_test(dsa
, f
, mask_reg
, fragA_reg
);
966 if (dsa
->depth
.enabled
|| dsa
->stencil
[0].enabled
) {
967 const enum pipe_format zs_format
= cell
->framebuffer
.zsbuf
->format
;
968 boolean write_depth_stencil
;
970 int fbZ_reg
= spe_allocate_available_register(f
); /* Z values */
971 int fbS_reg
= spe_allocate_available_register(f
); /* Stencil values */
973 /* fetch quad of depth/stencil values from tile at (x,y) */
974 /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */
975 spe_lqx(f
, fbZS_reg
, depth_tile_reg
, quad_offset_reg
);
977 if (dsa
->depth
.enabled
) {
978 /* Extract Z bits from fbZS_reg into fbZ_reg */
979 if (zs_format
== PIPE_FORMAT_S8Z24_UNORM
||
980 zs_format
== PIPE_FORMAT_X8Z24_UNORM
) {
981 int mask_reg
= spe_allocate_available_register(f
);
982 spe_fsmbi(f
, mask_reg
, 0x7777); /* mask[0,1,2,3] = 0x00ffffff */
983 spe_and(f
, fbZ_reg
, fbZS_reg
, mask_reg
); /* fbZ = fbZS & mask */
984 spe_release_register(f
, mask_reg
);
985 /* OK, fbZ_reg has four 24-bit Z values now */
988 /* XXX handle other z/stencil formats */
992 /* Convert fragZ values from float[4] to uint[4] */
993 if (zs_format
== PIPE_FORMAT_S8Z24_UNORM
||
994 zs_format
== PIPE_FORMAT_X8Z24_UNORM
||
995 zs_format
== PIPE_FORMAT_Z24S8_UNORM
||
996 zs_format
== PIPE_FORMAT_Z24X8_UNORM
) {
997 /* 24-bit Z values */
998 int scale_reg
= spe_allocate_available_register(f
);
1000 /* scale_reg[0,1,2,3] = float(2^24-1) */
1001 spe_load_float(f
, scale_reg
, (float) 0xffffff);
1003 /* XXX these two instructions might be combined */
1004 spe_fm(f
, fragZ_reg
, fragZ_reg
, scale_reg
); /* fragZ *= scale */
1005 spe_cfltu(f
, fragZ_reg
, fragZ_reg
, 0); /* fragZ = (int) fragZ */
1007 spe_release_register(f
, scale_reg
);
1010 /* XXX handle 16-bit Z format */
1015 if (dsa
->stencil
[0].enabled
) {
1016 /* Extract Stencil bit sfrom fbZS_reg into fbS_reg */
1017 if (zs_format
== PIPE_FORMAT_S8Z24_UNORM
||
1018 zs_format
== PIPE_FORMAT_X8Z24_UNORM
) {
1019 /* XXX extract with a shift */
1022 else if (zs_format
== PIPE_FORMAT_Z24S8_UNORM
||
1023 zs_format
== PIPE_FORMAT_Z24X8_UNORM
) {
1024 /* XXX extract with a mask */
1030 if (dsa
->stencil
[0].enabled
) {
1031 /* XXX this may involve depth testing too */
1032 // gen_stencil_test(dsa, f, ... );
1035 else if (dsa
->depth
.enabled
) {
1036 int zmask_reg
= spe_allocate_available_register(f
);
1037 gen_depth_test(dsa
, f
, mask_reg
, fragZ_reg
, fbZ_reg
, zmask_reg
);
1038 spe_release_register(f
, zmask_reg
);
1041 /* do we need to write Z and/or Stencil back into framebuffer? */
1042 write_depth_stencil
= (dsa
->depth
.writemask
|
1043 dsa
->stencil
[0].write_mask
|
1044 dsa
->stencil
[1].write_mask
);
1046 if (write_depth_stencil
) {
1047 /* Merge latest Z and Stencil values into fbZS_reg.
1048 * fbZ_reg has four Z vals in bits [23..0] or bits [15..0].
1049 * fbS_reg has four 8-bit Z values in bits [7..0].
1051 if (zs_format
== PIPE_FORMAT_S8Z24_UNORM
||
1052 zs_format
== PIPE_FORMAT_X8Z24_UNORM
) {
1053 spe_shli(f
, fbS_reg
, fbS_reg
, 24); /* fbS = fbS << 24 */
1054 spe_or(f
, fbZS_reg
, fbS_reg
, fbZ_reg
); /* fbZS = fbS | fbZ */
1056 else if (zs_format
== PIPE_FORMAT_S8Z24_UNORM
||
1057 zs_format
== PIPE_FORMAT_X8Z24_UNORM
) {
1061 else if (zs_format
== PIPE_FORMAT_Z16_UNORM
) {
1065 else if (zs_format
== PIPE_FORMAT_S8_UNORM
) {
1074 /* Store: memory[depth_tile_reg + quad_offset_reg] = fbZS */
1075 spe_stqx(f
, fbZS_reg
, depth_tile_reg
, quad_offset_reg
);
1078 spe_release_register(f
, fbZ_reg
);
1079 spe_release_register(f
, fbS_reg
);
1083 /* Get framebuffer quad/colors. We'll need these for blending,
1084 * color masking, and to obey the quad/pixel mask.
1085 * Load: fbRGBA_reg = memory[color_tile + quad_offset]
1086 * Note: if mask={~0,~0,~0,~0} and we're not blending or colormasking
1087 * we could skip this load.
1089 spe_lqx(f
, fbRGBA_reg
, color_tile_reg
, quad_offset_reg
);
1092 if (blend
->blend_enable
) {
1093 gen_blend(blend
, blend_color
, f
, color_format
,
1094 fragR_reg
, fragG_reg
, fragB_reg
, fragA_reg
, fbRGBA_reg
);
1098 * Write fragment colors to framebuffer/tile.
1099 * This involves converting the fragment colors from float[4] to the
1100 * tile's specific format and obeying the quad/pixel mask.
1103 int rgba_reg
= spe_allocate_available_register(f
);
1105 /* Pack four float colors as four 32-bit int colors */
1106 gen_pack_colors(f
, color_format
,
1107 fragR_reg
, fragG_reg
, fragB_reg
, fragA_reg
,
1110 if (blend
->logicop_enable
) {
1111 gen_logicop(blend
, f
, rgba_reg
, fbRGBA_reg
);
1114 if (blend
->colormask
!= 0xf) {
1115 gen_colormask(blend
->colormask
, f
, rgba_reg
, fbRGBA_reg
);
1119 /* Mix fragment colors with framebuffer colors using the quad/pixel mask:
1121 * rgba[i] = rgba[i];
1123 * rgba[i] = framebuffer[i];
1125 spe_selb(f
, rgba_reg
, fbRGBA_reg
, rgba_reg
, mask_reg
);
1127 /* Store updated quad in tile:
1128 * memory[color_tile + quad_offset] = rgba_reg;
1130 spe_stqx(f
, rgba_reg
, color_tile_reg
, quad_offset_reg
);
1132 spe_release_register(f
, rgba_reg
);
1135 //printf("gen_fragment_ops nr instructions: %u\n", f->num_inst);
1137 spe_bi(f
, SPE_REG_RA
, 0, 0); /* return from function call */
1140 spe_release_register(f
, fbRGBA_reg
);
1141 spe_release_register(f
, fbZS_reg
);
1142 spe_release_register(f
, quad_offset_reg
);
1144 if (cell
->debug_flags
& CELL_DEBUG_ASM
) {
1145 spe_comment(f
, -4, "End per-fragment ops");