2c80dd712e6feb597d5f98518bd0836f7e756271
[mesa.git] / src / gallium / drivers / cell / ppu / cell_gen_fragment.c
1 /**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29
30 /**
31 * Generate SPU per-fragment code (actually per-quad code).
32 * \author Brian Paul
33 */
34
35
36 #include "pipe/p_defines.h"
37 #include "pipe/p_state.h"
38 #include "rtasm/rtasm_ppc_spe.h"
39 #include "cell_context.h"
40 #include "cell_gen_fragment.h"
41
42
43
44 /** Do extra optimizations? */
45 #define OPTIMIZATIONS 1
46
47
48 /**
49 * Generate SPE code to perform Z/depth testing.
50 *
51 * \param dsa Gallium depth/stencil/alpha state to gen code for
52 * \param f SPE function to append instruction onto.
53 * \param mask_reg register containing quad/pixel "alive" mask (in/out)
54 * \param ifragZ_reg register containing integer fragment Z values (in)
55 * \param ifbZ_reg register containing integer frame buffer Z values (in/out)
56 * \param zmask_reg register containing result of Z test/comparison (out)
57 */
58 static void
59 gen_depth_test(const struct pipe_depth_stencil_alpha_state *dsa,
60 struct spe_function *f,
61 int mask_reg, int ifragZ_reg, int ifbZ_reg, int zmask_reg)
62 {
63 ASSERT(dsa->depth.enabled);
64
65 switch (dsa->depth.func) {
66 case PIPE_FUNC_EQUAL:
67 /* zmask = (ifragZ == ref) */
68 spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg);
69 /* mask = (mask & zmask) */
70 spe_and(f, mask_reg, mask_reg, zmask_reg);
71 break;
72
73 case PIPE_FUNC_NOTEQUAL:
74 /* zmask = (ifragZ == ref) */
75 spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg);
76 /* mask = (mask & ~zmask) */
77 spe_andc(f, mask_reg, mask_reg, zmask_reg);
78 break;
79
80 case PIPE_FUNC_GREATER:
81 /* zmask = (ifragZ > ref) */
82 spe_cgt(f, zmask_reg, ifragZ_reg, ifbZ_reg);
83 /* mask = (mask & zmask) */
84 spe_and(f, mask_reg, mask_reg, zmask_reg);
85 break;
86
87 case PIPE_FUNC_LESS:
88 /* zmask = (ref > ifragZ) */
89 spe_cgt(f, zmask_reg, ifbZ_reg, ifragZ_reg);
90 /* mask = (mask & zmask) */
91 spe_and(f, mask_reg, mask_reg, zmask_reg);
92 break;
93
94 case PIPE_FUNC_LEQUAL:
95 /* zmask = (ifragZ > ref) */
96 spe_cgt(f, zmask_reg, ifragZ_reg, ifbZ_reg);
97 /* mask = (mask & ~zmask) */
98 spe_andc(f, mask_reg, mask_reg, zmask_reg);
99 break;
100
101 case PIPE_FUNC_GEQUAL:
102 /* zmask = (ref > ifragZ) */
103 spe_cgt(f, zmask_reg, ifbZ_reg, ifragZ_reg);
104 /* mask = (mask & ~zmask) */
105 spe_andc(f, mask_reg, mask_reg, zmask_reg);
106 break;
107
108 case PIPE_FUNC_NEVER:
109 spe_il(f, mask_reg, 0); /* mask = {0,0,0,0} */
110 spe_move(f, zmask_reg, mask_reg); /* zmask = mask */
111 break;
112
113 case PIPE_FUNC_ALWAYS:
114 /* mask unchanged */
115 spe_il(f, zmask_reg, ~0); /* zmask = {~0,~0,~0,~0} */
116 break;
117
118 default:
119 ASSERT(0);
120 break;
121 }
122
123 if (dsa->depth.writemask) {
124 /*
125 * If (ztest passed) {
126 * framebufferZ = fragmentZ;
127 * }
128 * OR,
129 * framebufferZ = (ztest_passed ? fragmentZ : framebufferZ;
130 */
131 spe_selb(f, ifbZ_reg, ifbZ_reg, ifragZ_reg, mask_reg);
132 }
133 }
134
135
136 /**
137 * Generate SPE code to perform alpha testing.
138 *
139 * \param dsa Gallium depth/stencil/alpha state to gen code for
140 * \param f SPE function to append instruction onto.
141 * \param mask_reg register containing quad/pixel "alive" mask (in/out)
142 * \param fragA_reg register containing four fragment alpha values (in)
143 */
144 static void
145 gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa,
146 struct spe_function *f, int mask_reg, int fragA_reg)
147 {
148 int ref_reg = spe_allocate_available_register(f);
149 int amask_reg = spe_allocate_available_register(f);
150
151 ASSERT(dsa->alpha.enabled);
152
153 if ((dsa->alpha.func != PIPE_FUNC_NEVER) &&
154 (dsa->alpha.func != PIPE_FUNC_ALWAYS)) {
155 /* load/splat the alpha reference float value */
156 spe_load_float(f, ref_reg, dsa->alpha.ref);
157 }
158
159 /* emit code to do the alpha comparison, updating 'mask' */
160 switch (dsa->alpha.func) {
161 case PIPE_FUNC_EQUAL:
162 /* amask = (fragA == ref) */
163 spe_fceq(f, amask_reg, fragA_reg, ref_reg);
164 /* mask = (mask & amask) */
165 spe_and(f, mask_reg, mask_reg, amask_reg);
166 break;
167
168 case PIPE_FUNC_NOTEQUAL:
169 /* amask = (fragA == ref) */
170 spe_fceq(f, amask_reg, fragA_reg, ref_reg);
171 /* mask = (mask & ~amask) */
172 spe_andc(f, mask_reg, mask_reg, amask_reg);
173 break;
174
175 case PIPE_FUNC_GREATER:
176 /* amask = (fragA > ref) */
177 spe_fcgt(f, amask_reg, fragA_reg, ref_reg);
178 /* mask = (mask & amask) */
179 spe_and(f, mask_reg, mask_reg, amask_reg);
180 break;
181
182 case PIPE_FUNC_LESS:
183 /* amask = (ref > fragA) */
184 spe_fcgt(f, amask_reg, ref_reg, fragA_reg);
185 /* mask = (mask & amask) */
186 spe_and(f, mask_reg, mask_reg, amask_reg);
187 break;
188
189 case PIPE_FUNC_LEQUAL:
190 /* amask = (fragA > ref) */
191 spe_fcgt(f, amask_reg, fragA_reg, ref_reg);
192 /* mask = (mask & ~amask) */
193 spe_andc(f, mask_reg, mask_reg, amask_reg);
194 break;
195
196 case PIPE_FUNC_GEQUAL:
197 /* amask = (ref > fragA) */
198 spe_fcgt(f, amask_reg, ref_reg, fragA_reg);
199 /* mask = (mask & ~amask) */
200 spe_andc(f, mask_reg, mask_reg, amask_reg);
201 break;
202
203 case PIPE_FUNC_NEVER:
204 spe_il(f, mask_reg, 0); /* mask = [0,0,0,0] */
205 break;
206
207 case PIPE_FUNC_ALWAYS:
208 /* no-op, mask unchanged */
209 break;
210
211 default:
212 ASSERT(0);
213 break;
214 }
215
216 #if OPTIMIZATIONS
217 /* if mask == {0,0,0,0} we're all done, return */
218 {
219 /* re-use amask reg here */
220 int tmp_reg = amask_reg;
221 /* tmp[0] = (mask[0] | mask[1] | mask[2] | mask[3]) */
222 spe_orx(f, tmp_reg, mask_reg);
223 /* if tmp[0] == 0 then return from function call */
224 spe_biz(f, tmp_reg, SPE_REG_RA, 0, 0);
225 }
226 #endif
227
228 spe_release_register(f, ref_reg);
229 spe_release_register(f, amask_reg);
230 }
231
232 /* This is a convenient and oft-used sequence. It chooses
233 * the smaller of each element of reg1 and reg2, and combines them
234 * into the result register, as follows:
235 *
236 * The Float Compare Greater Than (fcgt) instruction will put
237 * 1s into compare_reg where reg1 > reg2, and 0s where reg1 <= reg2.
238 *
239 * Then the Select Bits (selb) instruction will take bits from
240 * reg1 where compare_reg is 0, and from reg2 where compare_reg is
241 * 1. Ergo, result_reg will have the bits from reg1 where reg1 <= reg2,
242 * and the bits from reg2 where reg1 > reg2, which is exactly the
243 * MIN operation.
244 */
245 #define FLOAT_VECTOR_MIN(f, result_reg, reg1, reg2) {\
246 int compare_reg = spe_allocate_available_register(f); \
247 spe_fcgt(f, compare_reg, reg1, reg2); \
248 spe_selb(f, result_reg, reg1, reg2, compare_reg); \
249 spe_release_register(f, compare_reg); \
250 }
251
252 /* The FLOAT_VECTOR_MAX sequence is similar to the FLOAT_VECTOR_MIN
253 * sequence above, except that the registers specified when selecting
254 * bits are reversed.
255 */
256 #define FLOAT_VECTOR_MAX(f, result_reg, reg1, reg2) {\
257 int compare_reg = spe_allocate_available_register(f); \
258 spe_fcgt(f, compare_reg, reg1, reg2); \
259 spe_selb(f, result_reg, reg2, reg1, compare_reg); \
260 spe_release_register(f, compare_reg); \
261 }
262
263 /**
264 * Generate SPE code to implement the given blend mode for a quad of pixels.
265 * \param f SPE function to append instruction onto.
266 * \param fragR_reg register with fragment red values (float) (in/out)
267 * \param fragG_reg register with fragment green values (float) (in/out)
268 * \param fragB_reg register with fragment blue values (float) (in/out)
269 * \param fragA_reg register with fragment alpha values (float) (in/out)
270 * \param fbRGBA_reg register with packed framebuffer colors (integer) (in)
271 */
272 static void
273 gen_blend(const struct pipe_blend_state *blend,
274 const struct pipe_blend_color *blend_color,
275 struct spe_function *f,
276 enum pipe_format color_format,
277 int fragR_reg, int fragG_reg, int fragB_reg, int fragA_reg,
278 int fbRGBA_reg)
279 {
280 int term1R_reg = spe_allocate_available_register(f);
281 int term1G_reg = spe_allocate_available_register(f);
282 int term1B_reg = spe_allocate_available_register(f);
283 int term1A_reg = spe_allocate_available_register(f);
284
285 int term2R_reg = spe_allocate_available_register(f);
286 int term2G_reg = spe_allocate_available_register(f);
287 int term2B_reg = spe_allocate_available_register(f);
288 int term2A_reg = spe_allocate_available_register(f);
289
290 int fbR_reg = spe_allocate_available_register(f);
291 int fbG_reg = spe_allocate_available_register(f);
292 int fbB_reg = spe_allocate_available_register(f);
293 int fbA_reg = spe_allocate_available_register(f);
294
295 int tmp_reg = spe_allocate_available_register(f);
296
297 /* These values might or might not eventually get put into
298 * registers. We avoid allocating them and setting them until
299 * they're actually needed; then we avoid setting them more than
300 * once, and release them at the end of code generation.
301 */
302 boolean one_reg_set = false;
303 int one_reg;
304 #define SET_ONE_REG_IF_UNSET(f) if (!one_reg_set) {\
305 one_reg = spe_allocate_available_register(f); \
306 spe_load_float(f, one_reg, 1.0f); \
307 one_reg_set = true; \
308 }
309 #define RELEASE_ONE_REG_IF_USED(f) if (one_reg_set) {\
310 spe_release_register(f, one_reg); \
311 }
312
313 boolean const_color_set = false;
314 int constR_reg, constG_reg, constB_reg;
315 #define SET_CONST_COLOR_IF_UNSET(f, blend_color) if (!const_color_set) {\
316 constR_reg = spe_allocate_available_register(f); \
317 constG_reg = spe_allocate_available_register(f); \
318 constG_reg = spe_allocate_available_register(f); \
319 spe_load_float(f, constR_reg, blend_color->color[0]); \
320 spe_load_float(f, constG_reg, blend_color->color[1]); \
321 spe_load_float(f, constB_reg, blend_color->color[2]); \
322 const_color_set = true;\
323 }
324 #define RELEASE_CONST_COLOR_IF_USED(f) if (const_color_set) {\
325 spe_release_register(f, constR_reg); \
326 spe_release_register(f, constG_reg); \
327 spe_release_register(f, constB_reg); \
328 }
329
330 boolean const_alpha_set = false;
331 int constA_reg;
332 #define SET_CONST_ALPHA_IF_UNSET(f, blend_color) if (!const_alpha_set) {\
333 constA_reg = spe_allocate_available_register(f); \
334 spe_load_float(f, constA_reg, blend_color->color[3]); \
335 const_alpha_set = true; \
336 }
337 #define RELEASE_CONST_ALPHA_IF_USED(f) if (const_alpha_set) {\
338 spe_release_register(f, constA_reg); \
339 }
340
341 /* Real code starts here */
342
343 ASSERT(blend->blend_enable);
344
345 /* Unpack/convert framebuffer colors from four 32-bit packed colors
346 * (fbRGBA) to four float RGBA vectors (fbR, fbG, fbB, fbA).
347 * Each 8-bit color component is expanded into a float in [0.0, 1.0].
348 */
349 {
350 int mask_reg = spe_allocate_available_register(f);
351
352 /* mask = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff} */
353 spe_load_int(f, mask_reg, 0xff);
354
355 /* XXX there may be more clever ways to implement the following code */
356 switch (color_format) {
357 case PIPE_FORMAT_A8R8G8B8_UNORM:
358 /* fbB = fbB & mask */
359 spe_and(f, fbB_reg, fbRGBA_reg, mask_reg);
360 /* mask = mask << 8 */
361 spe_roti(f, mask_reg, mask_reg, 8);
362
363 /* fbG = fbRGBA & mask */
364 spe_and(f, fbG_reg, fbRGBA_reg, mask_reg);
365 /* fbG = fbG >> 8 */
366 spe_roti(f, fbG_reg, fbG_reg, -8);
367 /* mask = mask << 8 */
368 spe_roti(f, mask_reg, mask_reg, 8);
369
370 /* fbR = fbRGBA & mask */
371 spe_and(f, fbR_reg, fbRGBA_reg, mask_reg);
372 /* fbR = fbR >> 16 */
373 spe_roti(f, fbR_reg, fbR_reg, -16);
374 /* mask = mask << 8 */
375 spe_roti(f, mask_reg, mask_reg, 8);
376
377 /* fbA = fbRGBA & mask */
378 spe_and(f, fbA_reg, fbRGBA_reg, mask_reg);
379 /* fbA = fbA >> 24 */
380 spe_roti(f, fbA_reg, fbA_reg, -24);
381 break;
382
383 case PIPE_FORMAT_B8G8R8A8_UNORM:
384 /* fbA = fbA & mask */
385 spe_and(f, fbA_reg, fbRGBA_reg, mask_reg);
386 /* mask = mask << 8 */
387 spe_roti(f, mask_reg, mask_reg, 8);
388
389 /* fbR = fbRGBA & mask */
390 spe_and(f, fbR_reg, fbRGBA_reg, mask_reg);
391 /* fbR = fbR >> 8 */
392 spe_roti(f, fbR_reg, fbR_reg, -8);
393 /* mask = mask << 8 */
394 spe_roti(f, mask_reg, mask_reg, 8);
395
396 /* fbG = fbRGBA & mask */
397 spe_and(f, fbG_reg, fbRGBA_reg, mask_reg);
398 /* fbG = fbG >> 16 */
399 spe_roti(f, fbG_reg, fbG_reg, -16);
400 /* mask = mask << 8 */
401 spe_roti(f, mask_reg, mask_reg, 8);
402
403 /* fbB = fbRGBA & mask */
404 spe_and(f, fbB_reg, fbRGBA_reg, mask_reg);
405 /* fbB = fbB >> 24 */
406 spe_roti(f, fbB_reg, fbB_reg, -24);
407 break;
408
409 default:
410 ASSERT(0);
411 }
412
413 /* convert int[4] in [0,255] to float[4] in [0.0, 1.0] */
414 spe_cuflt(f, fbR_reg, fbR_reg, 8);
415 spe_cuflt(f, fbG_reg, fbG_reg, 8);
416 spe_cuflt(f, fbB_reg, fbB_reg, 8);
417 spe_cuflt(f, fbA_reg, fbA_reg, 8);
418
419 spe_release_register(f, mask_reg);
420 }
421
422
423 /*
424 * Compute Src RGB terms. We're actually looking for the value
425 * of (the appropriate RGB factors) * (the incoming source RGB color).
426 */
427 switch (blend->rgb_src_factor) {
428 case PIPE_BLENDFACTOR_ONE:
429 /* factors = (1,1,1), so term = (R,G,B) */
430 spe_move(f, term1R_reg, fragR_reg);
431 spe_move(f, term1G_reg, fragG_reg);
432 spe_move(f, term1B_reg, fragB_reg);
433 break;
434 case PIPE_BLENDFACTOR_ZERO:
435 /* factors = (0,0,0), so term = (0,0,0) */
436 spe_load_float(f, term1R_reg, 0.0f);
437 spe_load_float(f, term1G_reg, 0.0f);
438 spe_load_float(f, term1B_reg, 0.0f);
439 break;
440 case PIPE_BLENDFACTOR_SRC_COLOR:
441 /* factors = (R,G,B), so term = (R*R, G*G, B*B) */
442 spe_fm(f, term1R_reg, fragR_reg, fragR_reg);
443 spe_fm(f, term1G_reg, fragG_reg, fragG_reg);
444 spe_fm(f, term1B_reg, fragB_reg, fragB_reg);
445 break;
446 case PIPE_BLENDFACTOR_SRC_ALPHA:
447 /* factors = (A,A,A), so term = (R*A, G*A, B*A) */
448 spe_fm(f, term1R_reg, fragR_reg, fragA_reg);
449 spe_fm(f, term1G_reg, fragG_reg, fragA_reg);
450 spe_fm(f, term1B_reg, fragB_reg, fragA_reg);
451 break;
452 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
453 /* factors = (1-R,1-G,1-B), so term = (R*(1-R), G*(1-G), B*(1-B)) */
454 /* we'll need the optional constant {1,1,1,1} register */
455 SET_ONE_REG_IF_UNSET(f)
456 /* tmp = 1 - R */
457 spe_fs(f, tmp_reg, one_reg, fragR_reg);
458 /* term = R * tmp */
459 spe_fm(f, term1R_reg, fragR_reg, tmp_reg);
460 /* repeat for G and B */
461 spe_fs(f, tmp_reg, one_reg, fragG_reg);
462 spe_fm(f, term1G_reg, fragG_reg, tmp_reg);
463 spe_fs(f, tmp_reg, one_reg, fragB_reg);
464 spe_fm(f, term1B_reg, fragB_reg, tmp_reg);
465 break;
466 case PIPE_BLENDFACTOR_DST_COLOR:
467 /* factors = (Rfb,Gfb,Bfb), so term = (R*Rfb, G*Gfb, B*Bfb) */
468 spe_fm(f, term1R_reg, fragR_reg, fbR_reg);
469 spe_fm(f, term1G_reg, fragG_reg, fbG_reg);
470 spe_fm(f, term1B_reg, fragB_reg, fbB_reg);
471 break;
472 case PIPE_BLENDFACTOR_INV_DST_COLOR:
473 /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (R*(1-Rfb),G*(1-Gfb),B*(1-Bfb)) */
474 /* we'll need the optional constant {1,1,1,1} register */
475 SET_ONE_REG_IF_UNSET(f)
476 /* tmp = 1 - Rfb */
477 spe_fs(f, tmp_reg, one_reg, fbR_reg);
478 /* term = R * tmp */
479 spe_fm(f, term1R_reg, fragR_reg, tmp_reg);
480 /* repeat for G and B */
481 spe_fs(f, tmp_reg, one_reg, fbG_reg);
482 spe_fm(f, term1G_reg, fragG_reg, tmp_reg);
483 spe_fs(f, tmp_reg, one_reg, fbB_reg);
484 spe_fm(f, term1B_reg, fragB_reg, tmp_reg);
485 break;
486 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
487 /* factors = (1-A,1-A,1-A), so term = (R*(1-A),G*(1-A),B*(1-A)) */
488 /* we'll need the optional constant {1,1,1,1} register */
489 SET_ONE_REG_IF_UNSET(f)
490 /* tmp = 1 - A */
491 spe_fs(f, tmp_reg, one_reg, fragA_reg);
492 /* term = R * tmp */
493 spe_fm(f, term1R_reg, fragR_reg, tmp_reg);
494 /* repeat for G and B with the same (1-A) factor */
495 spe_fm(f, term1G_reg, fragG_reg, tmp_reg);
496 spe_fm(f, term1B_reg, fragB_reg, tmp_reg);
497 break;
498 case PIPE_BLENDFACTOR_DST_ALPHA:
499 /* factors = (Afb, Afb, Afb), so term = (R*Afb, G*Afb, B*Afb) */
500 spe_fm(f, term1R_reg, fragR_reg, fbA_reg);
501 spe_fm(f, term1G_reg, fragG_reg, fbA_reg);
502 spe_fm(f, term1B_reg, fragB_reg, fbA_reg);
503 break;
504 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
505 /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (R*(1-Afb),G*(1-Afb),B*(1-Afb)) */
506 /* we'll need the optional constant {1,1,1,1} register */
507 SET_ONE_REG_IF_UNSET(f)
508 /* tmp = 1 - A */
509 spe_fs(f, tmp_reg, one_reg, fbA_reg);
510 /* term = R * tmp, G*tmp, and B*tmp */
511 spe_fm(f, term1R_reg, fragR_reg, tmp_reg);
512 spe_fm(f, term1G_reg, fragG_reg, tmp_reg);
513 spe_fm(f, term1B_reg, fragB_reg, tmp_reg);
514 break;
515 case PIPE_BLENDFACTOR_CONST_COLOR:
516 /* We'll need the optional blend color registers */
517 SET_CONST_COLOR_IF_UNSET(f,blend_color)
518 /* now, factor = (Rc,Gc,Bc), so term = (R*Rc,G*Gc,B*Bc) */
519 spe_fm(f, term1R_reg, fragR_reg, constR_reg);
520 spe_fm(f, term1G_reg, fragG_reg, constG_reg);
521 spe_fm(f, term1B_reg, fragB_reg, constB_reg);
522 break;
523 case PIPE_BLENDFACTOR_CONST_ALPHA:
524 /* we'll need the optional constant alpha register */
525 SET_CONST_ALPHA_IF_UNSET(f, blend_color)
526 /* factor = (Ac,Ac,Ac), so term = (R*Ac,G*Ac,B*Ac) */
527 spe_fm(f, term1R_reg, fragR_reg, constA_reg);
528 spe_fm(f, term1G_reg, fragG_reg, constA_reg);
529 spe_fm(f, term1B_reg, fragB_reg, constA_reg);
530 break;
531 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
532 /* We need both the optional {1,1,1,1} register, and the optional
533 * constant color registers
534 */
535 SET_ONE_REG_IF_UNSET(f)
536 SET_CONST_COLOR_IF_UNSET(f, blend_color)
537 /* factor = (1-Rc,1-Gc,1-Bc), so term = (R*(1-Rc),G*(1-Gc),B*(1-Bc)) */
538 spe_fs(f, tmp_reg, one_reg, constR_reg);
539 spe_fm(f, term1R_reg, fragR_reg, tmp_reg);
540 spe_fs(f, tmp_reg, one_reg, constG_reg);
541 spe_fm(f, term1G_reg, fragG_reg, tmp_reg);
542 spe_fs(f, tmp_reg, one_reg, constB_reg);
543 spe_fm(f, term1B_reg, fragB_reg, tmp_reg);
544 break;
545 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
546 /* We need the optional {1,1,1,1} register and the optional
547 * constant alpha register
548 */
549 SET_ONE_REG_IF_UNSET(f)
550 SET_CONST_ALPHA_IF_UNSET(f, blend_color)
551 /* factor = (1-Ac,1-Ac,1-Ac), so term = (R*(1-Ac),G*(1-Ac),B*(1-Ac)) */
552 spe_fs(f, tmp_reg, one_reg, constA_reg);
553 spe_fm(f, term1R_reg, fragR_reg, tmp_reg);
554 spe_fm(f, term1G_reg, fragG_reg, tmp_reg);
555 spe_fm(f, term1B_reg, fragB_reg, tmp_reg);
556 break;
557 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
558 /* We'll need the optional {1,1,1,1} register */
559 SET_ONE_REG_IF_UNSET(f)
560 /* factor = (min(A,1-Afb),min(A,1-Afb),min(A,1-Afb)), so
561 * term = (R*min(A,1-Afb), G*min(A,1-Afb), B*min(A,1-Afb))
562 */
563 /* tmp = 1 - Afb */
564 spe_fs(f, tmp_reg, one_reg, fbA_reg);
565 /* tmp = min(A,tmp) */
566 FLOAT_VECTOR_MIN(f, tmp_reg, fragA_reg, tmp_reg)
567 /* term = R*tmp */
568 spe_fm(f, term1R_reg, fragR_reg, tmp_reg);
569 spe_fm(f, term1G_reg, fragG_reg, tmp_reg);
570 spe_fm(f, term1B_reg, fragB_reg, tmp_reg);
571 break;
572
573 /* non-OpenGL cases? */
574 case PIPE_BLENDFACTOR_SRC1_COLOR:
575 case PIPE_BLENDFACTOR_SRC1_ALPHA:
576 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
577 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
578
579 default:
580 ASSERT(0);
581 }
582
583 /*
584 * Compute Src Alpha term
585 */
586 switch (blend->alpha_src_factor) {
587 case PIPE_BLENDFACTOR_ONE:
588 spe_move(f, term1A_reg, fragA_reg);
589 break;
590 case PIPE_BLENDFACTOR_SRC_COLOR:
591 spe_fm(f, term1A_reg, fragA_reg, fragA_reg);
592 break;
593 case PIPE_BLENDFACTOR_SRC_ALPHA:
594 spe_fm(f, term1A_reg, fragA_reg, fragA_reg);
595 break;
596 /* XXX more cases */
597 default:
598 ASSERT(0);
599 }
600
601 /*
602 * Compute Dest RGB terms
603 */
604 switch (blend->rgb_dst_factor) {
605 case PIPE_BLENDFACTOR_ONE:
606 spe_move(f, term2R_reg, fbR_reg);
607 spe_move(f, term2G_reg, fbG_reg);
608 spe_move(f, term2B_reg, fbB_reg);
609 break;
610 case PIPE_BLENDFACTOR_ZERO:
611 spe_zero(f, term2R_reg);
612 spe_zero(f, term2G_reg);
613 spe_zero(f, term2B_reg);
614 break;
615 case PIPE_BLENDFACTOR_SRC_COLOR:
616 spe_fm(f, term2R_reg, fbR_reg, fragR_reg);
617 spe_fm(f, term2G_reg, fbG_reg, fragG_reg);
618 spe_fm(f, term2B_reg, fbB_reg, fragB_reg);
619 break;
620 case PIPE_BLENDFACTOR_SRC_ALPHA:
621 spe_fm(f, term2R_reg, fbR_reg, fragA_reg);
622 spe_fm(f, term2G_reg, fbG_reg, fragA_reg);
623 spe_fm(f, term2B_reg, fbB_reg, fragA_reg);
624 break;
625 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
626 #if 0
627 /* one = {1.0, 1.0, 1.0, 1.0} */
628 if (!one_reg_set) {
629 one_reg = spe_allocate_available_register(f);
630 spe_load_float(f, one_reg, 1.0f);
631 one_reg_set = true;
632 }
633 /* tmp = one - fragA */
634 spe_fs(f, tmp_reg, one_reg, fragA_reg);
635 /* term = fb * tmp */
636 spe_fm(f, term2R_reg, fbR_reg, tmp_reg);
637 spe_fm(f, term2G_reg, fbG_reg, tmp_reg);
638 spe_fm(f, term2B_reg, fbB_reg, tmp_reg);
639 #else
640 /* Compute: term2x = fbx * (1.0 - fragA)
641 * Which is: term2x = fbx - fbx * fragA
642 * Use fnms t,a,b,c which computes t=c-a*b
643 */
644 spe_fnms(f, term2R_reg, fbR_reg, fragA_reg, fbR_reg);
645 spe_fnms(f, term2G_reg, fbG_reg, fragA_reg, fbG_reg);
646 spe_fnms(f, term2B_reg, fbB_reg, fragA_reg, fbB_reg);
647 #endif
648 break;
649 /* XXX more cases */
650 // GL_ONE_MINUS_SRC_COLOR
651 // GL_DST_COLOR
652 // GL_ONE_MINUS_DST_COLOR
653 // GL_DST_ALPHA
654 // GL_CONSTANT_COLOR
655 // GL_ONE_MINUS_CONSTANT_COLOR
656 // GL_CONSTANT_ALPHA
657 // GL_ONE_MINUS_CONSTANT_ALPHA
658 default:
659 ASSERT(0);
660 }
661
662 /*
663 * Compute Dest Alpha term
664 */
665 switch (blend->alpha_dst_factor) {
666 case PIPE_BLENDFACTOR_ONE:
667 spe_move(f, term2A_reg, fbA_reg);
668 break;
669 case PIPE_BLENDFACTOR_ZERO:
670 spe_zero(f, term2A_reg);
671 break;
672 case PIPE_BLENDFACTOR_SRC_ALPHA:
673 spe_fm(f, term2A_reg, fbA_reg, fragA_reg);
674 break;
675 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
676 #if 0
677 /* one = {1.0, 1.0, 1.0, 1.0} */
678 if (!one_reg_set) {
679 one_reg = spe_allocate_available_register(f);
680 spe_load_float(f, one_reg, 1.0f);
681 one_reg_set = true;
682 }
683 /* tmp = one - fragA */
684 spe_fs(f, tmp_reg, one_reg, fragA_reg);
685 /* termA = fbA * tmp */
686 spe_fm(f, term2A_reg, fbA_reg, tmp_reg);
687 #else
688 /* Compute: term2A = fbA * (1.0 - fragA)
689 * Which is: term2A = fbA - fbA * fragA
690 * Use fnms t,a,b,c which computes t=c-a*b
691 */
692 spe_fnms(f, term2A_reg, fbA_reg, fragA_reg, fbA_reg);
693 #endif
694 break;
695 /* XXX more cases */
696 // GL_ONE_MINUS_SRC_COLOR
697 // GL_DST_COLOR
698 // GL_ONE_MINUS_DST_COLOR
699 // GL_DST_ALPHA
700 // GL_CONSTANT_COLOR
701 // GL_ONE_MINUS_CONSTANT_COLOR
702 // GL_CONSTANT_ALPHA
703 // GL_ONE_MINUS_CONSTANT_ALPHA
704 default:
705 ASSERT(0);
706 }
707
708 /*
709 * Combine Src/Dest RGB terms
710 */
711 switch (blend->rgb_func) {
712 case PIPE_BLEND_ADD:
713 spe_fa(f, fragR_reg, term1R_reg, term2R_reg);
714 spe_fa(f, fragG_reg, term1G_reg, term2G_reg);
715 spe_fa(f, fragB_reg, term1B_reg, term2B_reg);
716 break;
717 case PIPE_BLEND_SUBTRACT:
718 spe_fs(f, fragR_reg, term1R_reg, term2R_reg);
719 spe_fs(f, fragG_reg, term1G_reg, term2G_reg);
720 spe_fs(f, fragB_reg, term1B_reg, term2B_reg);
721 break;
722 case PIPE_BLEND_REVERSE_SUBTRACT:
723 spe_fs(f, fragR_reg, term2R_reg, term1R_reg);
724 spe_fs(f, fragG_reg, term2G_reg, term1G_reg);
725 spe_fs(f, fragB_reg, term2B_reg, term1B_reg);
726 break;
727 case PIPE_BLEND_MIN:
728 FLOAT_VECTOR_MIN(f, fragR_reg, term1R_reg, term2R_reg)
729 FLOAT_VECTOR_MIN(f, fragG_reg, term1G_reg, term2G_reg)
730 FLOAT_VECTOR_MIN(f, fragB_reg, term1B_reg, term2B_reg)
731 break;
732 case PIPE_BLEND_MAX:
733 FLOAT_VECTOR_MAX(f, fragR_reg, term1R_reg, term2R_reg)
734 FLOAT_VECTOR_MAX(f, fragG_reg, term1G_reg, term2G_reg)
735 FLOAT_VECTOR_MAX(f, fragB_reg, term1B_reg, term2B_reg)
736 break;
737 default:
738 ASSERT(0);
739 }
740
741 /*
742 * Combine Src/Dest A term
743 */
744 switch (blend->alpha_func) {
745 case PIPE_BLEND_ADD:
746 spe_fa(f, fragA_reg, term1A_reg, term2A_reg);
747 break;
748 case PIPE_BLEND_SUBTRACT:
749 spe_fs(f, fragA_reg, term1A_reg, term2A_reg);
750 break;
751 case PIPE_BLEND_REVERSE_SUBTRACT:
752 spe_fs(f, fragA_reg, term2A_reg, term1A_reg);
753 break;
754 case PIPE_BLEND_MIN:
755 FLOAT_VECTOR_MIN(f, fragA_reg, term1A_reg, term2A_reg)
756 break;
757 case PIPE_BLEND_MAX:
758 FLOAT_VECTOR_MAX(f, fragA_reg, term1A_reg, term2A_reg)
759 break;
760 default:
761 ASSERT(0);
762 }
763
764 spe_release_register(f, term1R_reg);
765 spe_release_register(f, term1G_reg);
766 spe_release_register(f, term1B_reg);
767 spe_release_register(f, term1A_reg);
768
769 spe_release_register(f, term2R_reg);
770 spe_release_register(f, term2G_reg);
771 spe_release_register(f, term2B_reg);
772 spe_release_register(f, term2A_reg);
773
774 spe_release_register(f, fbR_reg);
775 spe_release_register(f, fbG_reg);
776 spe_release_register(f, fbB_reg);
777 spe_release_register(f, fbA_reg);
778
779 spe_release_register(f, tmp_reg);
780
781 /* Free any optional registers that actually got used */
782 RELEASE_ONE_REG_IF_USED(f)
783 RELEASE_CONST_COLOR_IF_USED(f)
784 RELEASE_CONST_ALPHA_IF_USED(f)
785 }
786
787
788 static void
789 gen_logicop(const struct pipe_blend_state *blend,
790 struct spe_function *f,
791 int fragRGBA_reg, int fbRGBA_reg)
792 {
793 /* XXX to-do */
794 /* operate on 32-bit packed pixels, not float colors */
795 }
796
797
798 static void
799 gen_colormask(uint colormask,
800 struct spe_function *f,
801 int fragRGBA_reg, int fbRGBA_reg)
802 {
803 /* XXX to-do */
804 /* operate on 32-bit packed pixels, not float colors */
805 }
806
807
808
809 /**
810 * Generate code to pack a quad of float colors into a four 32-bit integers.
811 *
812 * \param f SPE function to append instruction onto.
813 * \param color_format the dest color packing format
814 * \param r_reg register containing four red values (in/clobbered)
815 * \param g_reg register containing four green values (in/clobbered)
816 * \param b_reg register containing four blue values (in/clobbered)
817 * \param a_reg register containing four alpha values (in/clobbered)
818 * \param rgba_reg register to store the packed RGBA colors (out)
819 */
820 static void
821 gen_pack_colors(struct spe_function *f,
822 enum pipe_format color_format,
823 int r_reg, int g_reg, int b_reg, int a_reg,
824 int rgba_reg)
825 {
826 /* Convert float[4] in [0.0,1.0] to int[4] in [0,~0], with clamping */
827 spe_cfltu(f, r_reg, r_reg, 32);
828 spe_cfltu(f, g_reg, g_reg, 32);
829 spe_cfltu(f, b_reg, b_reg, 32);
830 spe_cfltu(f, a_reg, a_reg, 32);
831
832 /* Shift the most significant bytes to least the significant positions.
833 * I.e.: reg = reg >> 24
834 */
835 spe_rotmi(f, r_reg, r_reg, -24);
836 spe_rotmi(f, g_reg, g_reg, -24);
837 spe_rotmi(f, b_reg, b_reg, -24);
838 spe_rotmi(f, a_reg, a_reg, -24);
839
840 /* Shift the color bytes according to the surface format */
841 if (color_format == PIPE_FORMAT_A8R8G8B8_UNORM) {
842 spe_roti(f, g_reg, g_reg, 8); /* green <<= 8 */
843 spe_roti(f, r_reg, r_reg, 16); /* red <<= 16 */
844 spe_roti(f, a_reg, a_reg, 24); /* alpha <<= 24 */
845 }
846 else if (color_format == PIPE_FORMAT_B8G8R8A8_UNORM) {
847 spe_roti(f, r_reg, r_reg, 8); /* red <<= 8 */
848 spe_roti(f, g_reg, g_reg, 16); /* green <<= 16 */
849 spe_roti(f, b_reg, b_reg, 24); /* blue <<= 24 */
850 }
851 else {
852 ASSERT(0);
853 }
854
855 /* Merge red, green, blue, alpha registers to make packed RGBA colors.
856 * Eg: after shifting according to color_format we might have:
857 * R = {0x00ff0000, 0x00110000, 0x00220000, 0x00330000}
858 * G = {0x0000ff00, 0x00004400, 0x00005500, 0x00006600}
859 * B = {0x000000ff, 0x00000077, 0x00000088, 0x00000099}
860 * A = {0xff000000, 0xaa000000, 0xbb000000, 0xcc000000}
861 * OR-ing all those together gives us four packed colors:
862 * RGBA = {0xffffffff, 0xaa114477, 0xbb225588, 0xcc336699}
863 */
864 spe_or(f, rgba_reg, r_reg, g_reg);
865 spe_or(f, rgba_reg, rgba_reg, b_reg);
866 spe_or(f, rgba_reg, rgba_reg, a_reg);
867 }
868
869
870
871
872 /**
873 * Generate SPE code to implement the fragment operations (alpha test,
874 * depth test, stencil test, blending, colormask, and final
875 * framebuffer write) as specified by the current context state.
876 *
877 * Logically, this code will be called after running the fragment
878 * shader. But under some circumstances we could run some of this
879 * code before the fragment shader to cull fragments/quads that are
880 * totally occluded/discarded.
881 *
882 * XXX we only support PIPE_FORMAT_Z24S8_UNORM z/stencil buffer right now.
883 *
884 * See the spu_default_fragment_ops() function to see how the per-fragment
885 * operations would be done with ordinary C code.
886 * The code we generate here though has no branches, is SIMD, etc and
887 * should be much faster.
888 *
889 * \param cell the rendering context (in)
890 * \param f the generated function (out)
891 */
892 void
893 cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
894 {
895 const struct pipe_depth_stencil_alpha_state *dsa =
896 &cell->depth_stencil->base;
897 const struct pipe_blend_state *blend = &cell->blend->base;
898 const struct pipe_blend_color *blend_color = &cell->blend_color;
899 const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format;
900
901 /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
902 const int x_reg = 3; /* uint */
903 const int y_reg = 4; /* uint */
904 const int color_tile_reg = 5; /* tile_t * */
905 const int depth_tile_reg = 6; /* tile_t * */
906 const int fragZ_reg = 7; /* vector float */
907 const int fragR_reg = 8; /* vector float */
908 const int fragG_reg = 9; /* vector float */
909 const int fragB_reg = 10; /* vector float */
910 const int fragA_reg = 11; /* vector float */
911 const int mask_reg = 12; /* vector uint */
912
913 /* offset of quad from start of tile
914 * XXX assuming 4-byte pixels for color AND Z/stencil!!!!
915 */
916 int quad_offset_reg;
917
918 int fbRGBA_reg; /**< framebuffer's RGBA colors for quad */
919 int fbZS_reg; /**< framebuffer's combined z/stencil values for quad */
920
921 spe_init_func(f, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE);
922
923 if (cell->debug_flags & CELL_DEBUG_ASM) {
924 spe_print_code(f, true);
925 spe_indent(f, 8);
926 spe_comment(f, -4, "Begin per-fragment ops");
927 }
928
929 spe_allocate_register(f, x_reg);
930 spe_allocate_register(f, y_reg);
931 spe_allocate_register(f, color_tile_reg);
932 spe_allocate_register(f, depth_tile_reg);
933 spe_allocate_register(f, fragZ_reg);
934 spe_allocate_register(f, fragR_reg);
935 spe_allocate_register(f, fragG_reg);
936 spe_allocate_register(f, fragB_reg);
937 spe_allocate_register(f, fragA_reg);
938 spe_allocate_register(f, mask_reg);
939
940 quad_offset_reg = spe_allocate_available_register(f);
941 fbRGBA_reg = spe_allocate_available_register(f);
942 fbZS_reg = spe_allocate_available_register(f);
943
944 /* compute offset of quad from start of tile, in bytes */
945 {
946 int x2_reg = spe_allocate_available_register(f);
947 int y2_reg = spe_allocate_available_register(f);
948
949 ASSERT(TILE_SIZE == 32);
950
951 spe_rotmi(f, x2_reg, x_reg, -1); /* x2 = x / 2 */
952 spe_rotmi(f, y2_reg, y_reg, -1); /* y2 = y / 2 */
953 spe_shli(f, y2_reg, y2_reg, 4); /* y2 *= 16 */
954 spe_a(f, quad_offset_reg, y2_reg, x2_reg); /* offset = y2 + x2 */
955 spe_shli(f, quad_offset_reg, quad_offset_reg, 4); /* offset *= 16 */
956
957 spe_release_register(f, x2_reg);
958 spe_release_register(f, y2_reg);
959 }
960
961
962 if (dsa->alpha.enabled) {
963 gen_alpha_test(dsa, f, mask_reg, fragA_reg);
964 }
965
966 if (dsa->depth.enabled || dsa->stencil[0].enabled) {
967 const enum pipe_format zs_format = cell->framebuffer.zsbuf->format;
968 boolean write_depth_stencil;
969
970 int fbZ_reg = spe_allocate_available_register(f); /* Z values */
971 int fbS_reg = spe_allocate_available_register(f); /* Stencil values */
972
973 /* fetch quad of depth/stencil values from tile at (x,y) */
974 /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */
975 spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
976
977 if (dsa->depth.enabled) {
978 /* Extract Z bits from fbZS_reg into fbZ_reg */
979 if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
980 zs_format == PIPE_FORMAT_X8Z24_UNORM) {
981 int mask_reg = spe_allocate_available_register(f);
982 spe_fsmbi(f, mask_reg, 0x7777); /* mask[0,1,2,3] = 0x00ffffff */
983 spe_and(f, fbZ_reg, fbZS_reg, mask_reg); /* fbZ = fbZS & mask */
984 spe_release_register(f, mask_reg);
985 /* OK, fbZ_reg has four 24-bit Z values now */
986 }
987 else {
988 /* XXX handle other z/stencil formats */
989 ASSERT(0);
990 }
991
992 /* Convert fragZ values from float[4] to uint[4] */
993 if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
994 zs_format == PIPE_FORMAT_X8Z24_UNORM ||
995 zs_format == PIPE_FORMAT_Z24S8_UNORM ||
996 zs_format == PIPE_FORMAT_Z24X8_UNORM) {
997 /* 24-bit Z values */
998 int scale_reg = spe_allocate_available_register(f);
999
1000 /* scale_reg[0,1,2,3] = float(2^24-1) */
1001 spe_load_float(f, scale_reg, (float) 0xffffff);
1002
1003 /* XXX these two instructions might be combined */
1004 spe_fm(f, fragZ_reg, fragZ_reg, scale_reg); /* fragZ *= scale */
1005 spe_cfltu(f, fragZ_reg, fragZ_reg, 0); /* fragZ = (int) fragZ */
1006
1007 spe_release_register(f, scale_reg);
1008 }
1009 else {
1010 /* XXX handle 16-bit Z format */
1011 ASSERT(0);
1012 }
1013 }
1014
1015 if (dsa->stencil[0].enabled) {
1016 /* Extract Stencil bit sfrom fbZS_reg into fbS_reg */
1017 if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
1018 zs_format == PIPE_FORMAT_X8Z24_UNORM) {
1019 /* XXX extract with a shift */
1020 ASSERT(0);
1021 }
1022 else if (zs_format == PIPE_FORMAT_Z24S8_UNORM ||
1023 zs_format == PIPE_FORMAT_Z24X8_UNORM) {
1024 /* XXX extract with a mask */
1025 ASSERT(0);
1026 }
1027 }
1028
1029
1030 if (dsa->stencil[0].enabled) {
1031 /* XXX this may involve depth testing too */
1032 // gen_stencil_test(dsa, f, ... );
1033 ASSERT(0);
1034 }
1035 else if (dsa->depth.enabled) {
1036 int zmask_reg = spe_allocate_available_register(f);
1037 gen_depth_test(dsa, f, mask_reg, fragZ_reg, fbZ_reg, zmask_reg);
1038 spe_release_register(f, zmask_reg);
1039 }
1040
1041 /* do we need to write Z and/or Stencil back into framebuffer? */
1042 write_depth_stencil = (dsa->depth.writemask |
1043 dsa->stencil[0].write_mask |
1044 dsa->stencil[1].write_mask);
1045
1046 if (write_depth_stencil) {
1047 /* Merge latest Z and Stencil values into fbZS_reg.
1048 * fbZ_reg has four Z vals in bits [23..0] or bits [15..0].
1049 * fbS_reg has four 8-bit Z values in bits [7..0].
1050 */
1051 if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
1052 zs_format == PIPE_FORMAT_X8Z24_UNORM) {
1053 spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */
1054 spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */
1055 }
1056 else if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
1057 zs_format == PIPE_FORMAT_X8Z24_UNORM) {
1058 /* XXX to do */
1059 ASSERT(0);
1060 }
1061 else if (zs_format == PIPE_FORMAT_Z16_UNORM) {
1062 /* XXX to do */
1063 ASSERT(0);
1064 }
1065 else if (zs_format == PIPE_FORMAT_S8_UNORM) {
1066 /* XXX to do */
1067 ASSERT(0);
1068 }
1069 else {
1070 /* bad zs_format */
1071 ASSERT(0);
1072 }
1073
1074 /* Store: memory[depth_tile_reg + quad_offset_reg] = fbZS */
1075 spe_stqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
1076 }
1077
1078 spe_release_register(f, fbZ_reg);
1079 spe_release_register(f, fbS_reg);
1080 }
1081
1082
1083 /* Get framebuffer quad/colors. We'll need these for blending,
1084 * color masking, and to obey the quad/pixel mask.
1085 * Load: fbRGBA_reg = memory[color_tile + quad_offset]
1086 * Note: if mask={~0,~0,~0,~0} and we're not blending or colormasking
1087 * we could skip this load.
1088 */
1089 spe_lqx(f, fbRGBA_reg, color_tile_reg, quad_offset_reg);
1090
1091
1092 if (blend->blend_enable) {
1093 gen_blend(blend, blend_color, f, color_format,
1094 fragR_reg, fragG_reg, fragB_reg, fragA_reg, fbRGBA_reg);
1095 }
1096
1097 /*
1098 * Write fragment colors to framebuffer/tile.
1099 * This involves converting the fragment colors from float[4] to the
1100 * tile's specific format and obeying the quad/pixel mask.
1101 */
1102 {
1103 int rgba_reg = spe_allocate_available_register(f);
1104
1105 /* Pack four float colors as four 32-bit int colors */
1106 gen_pack_colors(f, color_format,
1107 fragR_reg, fragG_reg, fragB_reg, fragA_reg,
1108 rgba_reg);
1109
1110 if (blend->logicop_enable) {
1111 gen_logicop(blend, f, rgba_reg, fbRGBA_reg);
1112 }
1113
1114 if (blend->colormask != 0xf) {
1115 gen_colormask(blend->colormask, f, rgba_reg, fbRGBA_reg);
1116 }
1117
1118
1119 /* Mix fragment colors with framebuffer colors using the quad/pixel mask:
1120 * if (mask[i])
1121 * rgba[i] = rgba[i];
1122 * else
1123 * rgba[i] = framebuffer[i];
1124 */
1125 spe_selb(f, rgba_reg, fbRGBA_reg, rgba_reg, mask_reg);
1126
1127 /* Store updated quad in tile:
1128 * memory[color_tile + quad_offset] = rgba_reg;
1129 */
1130 spe_stqx(f, rgba_reg, color_tile_reg, quad_offset_reg);
1131
1132 spe_release_register(f, rgba_reg);
1133 }
1134
1135 //printf("gen_fragment_ops nr instructions: %u\n", f->num_inst);
1136
1137 spe_bi(f, SPE_REG_RA, 0, 0); /* return from function call */
1138
1139
1140 spe_release_register(f, fbRGBA_reg);
1141 spe_release_register(f, fbZS_reg);
1142 spe_release_register(f, quad_offset_reg);
1143
1144 if (cell->debug_flags & CELL_DEBUG_ASM) {
1145 spe_comment(f, -4, "End per-fragment ops");
1146 }
1147 }