Merge remote branch 'upstream/gallium-0.1' into nouveau-gallium-0.1
[mesa.git] / src / gallium / drivers / cell / ppu / cell_state_per_fragment.c
1 /*
2 * (C) Copyright IBM Corporation 2008
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file
27 * Generate code to perform all per-fragment operations.
28 *
29 * Code generated by these functions perform both alpha, depth, and stencil
30 * testing as well as alpha blending.
31 *
32 * \note
33 * Occlusion query is not supported, but this is the right place to add that
34 * support.
35 *
36 * \author Ian Romanick <idr@us.ibm.com>
37 */
38
39 #include "pipe/p_defines.h"
40 #include "pipe/p_state.h"
41
42 #include "cell_context.h"
43
44 #include "rtasm/rtasm_ppc_spe.h"
45
46
47 /**
48 * Generate code to perform alpha testing.
49 *
50 * The code generated by this function uses the register specificed by
51 * \c mask as both an input and an output.
52 *
53 * \param dsa Current alpha-test state
54 * \param f Function to which code should be appended
55 * \param mask Index of register containing active fragment mask
56 * \param alphas Index of register containing per-fragment alpha values
57 *
58 * \note Emits a maximum of 6 instructions.
59 */
60 static void
61 emit_alpha_test(struct pipe_depth_stencil_alpha_state *dsa,
62 struct spe_function *f, int mask, int alphas)
63 {
64 /* If the alpha function is either NEVER or ALWAYS, there is no need to
65 * load the reference value into a register. ALWAYS is a fairly common
66 * case, and this optimization saves 2 instructions.
67 */
68 if (dsa->alpha.enabled
69 && (dsa->alpha.func != PIPE_FUNC_NEVER)
70 && (dsa->alpha.func != PIPE_FUNC_ALWAYS)) {
71 int ref = spe_allocate_available_register(f);
72 int tmp_a = spe_allocate_available_register(f);
73 int tmp_b = spe_allocate_available_register(f);
74 union {
75 float f;
76 unsigned u;
77 } ref_val;
78 boolean complement = FALSE;
79
80 ref_val.f = dsa->alpha.ref;
81
82 spe_il(f, ref, ref_val.u & 0x0000ffff);
83 spe_ilh(f, ref, ref_val.u >> 16);
84
85 switch (dsa->alpha.func) {
86 case PIPE_FUNC_NOTEQUAL:
87 complement = TRUE;
88 /* FALLTHROUGH */
89
90 case PIPE_FUNC_EQUAL:
91 spe_fceq(f, tmp_a, ref, alphas);
92 break;
93
94 case PIPE_FUNC_LEQUAL:
95 complement = TRUE;
96 /* FALLTHROUGH */
97
98 case PIPE_FUNC_GREATER:
99 spe_fcgt(f, tmp_a, ref, alphas);
100 break;
101
102 case PIPE_FUNC_LESS:
103 complement = TRUE;
104 /* FALLTHROUGH */
105
106 case PIPE_FUNC_GEQUAL:
107 spe_fcgt(f, tmp_a, ref, alphas);
108 spe_fceq(f, tmp_b, ref, alphas);
109 spe_or(f, tmp_a, tmp_b, tmp_a);
110 break;
111
112 case PIPE_FUNC_ALWAYS:
113 case PIPE_FUNC_NEVER:
114 default:
115 assert(0);
116 break;
117 }
118
119 if (complement) {
120 spe_andc(f, mask, mask, tmp_a);
121 } else {
122 spe_and(f, mask, mask, tmp_a);
123 }
124
125 spe_release_register(f, ref);
126 spe_release_register(f, tmp_a);
127 spe_release_register(f, tmp_b);
128 } else if (dsa->alpha.enabled && (dsa->alpha.func == PIPE_FUNC_NEVER)) {
129 spe_il(f, mask, 0);
130 }
131 }
132
133
134 /**
135 * \param dsa Current depth-test state
136 * \param f Function to which code should be appended
137 * \param m Mask of allocated / free SPE registers
138 * \param mask Index of register to contain depth-pass mask
139 * \param stored Index of register containing values from depth buffer
140 * \param calculated Index of register containing per-fragment depth values
141 *
142 * \return
143 * If the calculated depth comparison mask is the actual mask, \c FALSE is
144 * returned. If the calculated depth comparison mask is the compliment of
145 * the actual mask, \c TRUE is returned.
146 *
147 * \note Emits a maximum of 3 instructions.
148 */
149 static boolean
150 emit_depth_test(struct pipe_depth_stencil_alpha_state *dsa,
151 struct spe_function *f, int mask, int stored, int calculated)
152 {
153 unsigned func = (dsa->depth.enabled)
154 ? dsa->depth.func : PIPE_FUNC_ALWAYS;
155 int tmp = spe_allocate_available_register(f);
156 boolean compliment = FALSE;
157
158 switch (func) {
159 case PIPE_FUNC_NEVER:
160 spe_il(f, mask, 0);
161 break;
162
163 case PIPE_FUNC_NOTEQUAL:
164 compliment = TRUE;
165 /* FALLTHROUGH */
166 case PIPE_FUNC_EQUAL:
167 spe_ceq(f, mask, calculated, stored);
168 break;
169
170 case PIPE_FUNC_LEQUAL:
171 compliment = TRUE;
172 /* FALLTHROUGH */
173 case PIPE_FUNC_GREATER:
174 spe_clgt(f, mask, calculated, stored);
175 break;
176
177 case PIPE_FUNC_LESS:
178 compliment = TRUE;
179 /* FALLTHROUGH */
180 case PIPE_FUNC_GEQUAL:
181 spe_clgt(f, mask, calculated, stored);
182 spe_ceq(f, tmp, calculated, stored);
183 spe_or(f, mask, mask, tmp);
184 break;
185
186 case PIPE_FUNC_ALWAYS:
187 spe_il(f, mask, ~0);
188 break;
189
190 default:
191 assert(0);
192 break;
193 }
194
195 spe_release_register(f, tmp);
196 return compliment;
197 }
198
199
200 /**
201 * \note Emits a maximum of 5 instructions.
202 *
203 * \warning
204 * Since \c out and \c in might be the same register, this routine cannot
205 * generate code that uses \c out as a temporary.
206 */
207 static void
208 emit_stencil_op(struct spe_function *f,
209 int out, int in, int mask, unsigned op, unsigned ref)
210 {
211 const int clamp = spe_allocate_available_register(f);
212 const int clamp_mask = spe_allocate_available_register(f);
213 const int result = spe_allocate_available_register(f);
214
215 switch(op) {
216 case PIPE_STENCIL_OP_KEEP:
217 assert(0);
218 case PIPE_STENCIL_OP_ZERO:
219 spe_il(f, result, 0);
220 break;
221 case PIPE_STENCIL_OP_REPLACE:
222 spe_il(f, result, ref);
223 break;
224 case PIPE_STENCIL_OP_INCR:
225 spe_il(f, clamp, 0x0ff);
226 spe_ai(f, result, in, 1);
227 spe_clgti(f, clamp_mask, result, 0x0ff);
228 spe_selb(f, result, result, clamp, clamp_mask);
229 break;
230 case PIPE_STENCIL_OP_DECR:
231 spe_il(f, clamp, 0);
232 spe_ai(f, result, in, -1);
233
234 /* If "(s-1) < 0" in signed arithemtic, then "(s-1) > MAX" in unsigned
235 * arithmetic.
236 */
237 spe_clgti(f, clamp_mask, result, 0x0ff);
238 spe_selb(f, result, result, clamp, clamp_mask);
239 break;
240 case PIPE_STENCIL_OP_INCR_WRAP:
241 spe_ai(f, result, in, 1);
242 break;
243 case PIPE_STENCIL_OP_DECR_WRAP:
244 spe_ai(f, result, in, -1);
245 break;
246 case PIPE_STENCIL_OP_INVERT:
247 spe_nor(f, result, in, in);
248 break;
249 default:
250 assert(0);
251 }
252
253 spe_selb(f, out, in, result, mask);
254
255 spe_release_register(f, result);
256 spe_release_register(f, clamp_mask);
257 spe_release_register(f, clamp);
258 }
259
260
261 /**
262 * \param dsa Depth / stencil test state
263 * \param face 0 for front face, 1 for back face
264 * \param f Function to append instructions to
265 * \param reg_mask Mask of allocated registers
266 * \param mask Register containing mask of fragments passing the
267 * alpha test
268 * \param depth_mask Register containing mask of fragments passing the
269 * depth test
270 * \param depth_compliment Is \c depth_mask the compliment of the actual mask?
271 * \param stencil Register containing values from stencil buffer
272 * \param depth_pass Register to store mask of fragments passing stencil test
273 * and depth test
274 *
275 * \note
276 * Emits a maximum of 10 + (3 * 5) = 25 instructions.
277 */
278 static int
279 emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa,
280 unsigned face,
281 struct spe_function *f,
282 int mask,
283 int depth_mask,
284 boolean depth_complement,
285 int stencil,
286 int depth_pass)
287 {
288 int stencil_fail = spe_allocate_available_register(f);
289 int depth_fail = spe_allocate_available_register(f);
290 int stencil_mask = spe_allocate_available_register(f);
291 int stencil_pass = spe_allocate_available_register(f);
292 int face_stencil = spe_allocate_available_register(f);
293 int stencil_src = stencil;
294 const unsigned ref = (dsa->stencil[face].ref_value
295 & dsa->stencil[face].value_mask);
296 boolean complement = FALSE;
297 int stored;
298 int tmp = spe_allocate_available_register(f);
299
300
301 if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
302 && (dsa->stencil[face].func != PIPE_FUNC_ALWAYS)
303 && (dsa->stencil[face].value_mask != 0x0ff)) {
304 stored = spe_allocate_available_register(f);
305 spe_andi(f, stored, stencil, dsa->stencil[face].value_mask);
306 } else {
307 stored = stencil;
308 }
309
310
311 switch (dsa->stencil[face].func) {
312 case PIPE_FUNC_NEVER:
313 spe_il(f, stencil_mask, 0);
314 break;
315
316 case PIPE_FUNC_NOTEQUAL:
317 complement = TRUE;
318 /* FALLTHROUGH */
319 case PIPE_FUNC_EQUAL:
320 spe_ceqi(f, stencil_mask, stored, ref);
321 break;
322
323 case PIPE_FUNC_LEQUAL:
324 complement = TRUE;
325 /* FALLTHROUGH */
326 case PIPE_FUNC_GREATER:
327 spe_clgti(f, stencil_mask, stored, ref);
328 break;
329
330 case PIPE_FUNC_LESS:
331 complement = TRUE;
332 /* FALLTHROUGH */
333 case PIPE_FUNC_GEQUAL:
334 spe_clgti(f, stencil_mask, stored, ref);
335 spe_ceqi(f, tmp, stored, ref);
336 spe_or(f, stencil_mask, stencil_mask, tmp);
337 break;
338
339 case PIPE_FUNC_ALWAYS:
340 /* See comment below. */
341 break;
342
343 default:
344 assert(0);
345 break;
346 }
347
348 if (stored != stencil) {
349 spe_release_register(f, stored);
350 }
351 spe_release_register(f, tmp);
352
353
354 /* ALWAYS is a very common stencil-test, so some effort is applied to
355 * optimize that case. The stencil-pass mask is the same as the input
356 * fragment mask. This makes the stencil-test (above) a no-op, and the
357 * input fragment mask can be "renamed" the stencil-pass mask.
358 */
359 if (dsa->stencil[face].func == PIPE_FUNC_ALWAYS) {
360 spe_release_register(f, stencil_pass);
361 stencil_pass = mask;
362 } else {
363 if (complement) {
364 spe_andc(f, stencil_pass, mask, stencil_mask);
365 } else {
366 spe_and(f, stencil_pass, mask, stencil_mask);
367 }
368 }
369
370 if (depth_complement) {
371 spe_andc(f, depth_pass, stencil_pass, depth_mask);
372 } else {
373 spe_and(f, depth_pass, stencil_pass, depth_mask);
374 }
375
376
377 /* Conditionally emit code to update the stencil value under various
378 * condititons. Note that there is no need to generate code under the
379 * following circumstances:
380 *
381 * - Stencil write mask is zero.
382 * - For stencil-fail if the stencil test is ALWAYS
383 * - For depth-fail if the stencil test is NEVER
384 * - For depth-pass if the stencil test is NEVER
385 * - Any of the 3 conditions if the operation is KEEP
386 */
387 if (dsa->stencil[face].write_mask != 0) {
388 if ((dsa->stencil[face].func != PIPE_FUNC_ALWAYS)
389 && (dsa->stencil[face].fail_op != PIPE_STENCIL_OP_KEEP)) {
390 if (complement) {
391 spe_and(f, stencil_fail, mask, stencil_mask);
392 } else {
393 spe_andc(f, stencil_fail, mask, stencil_mask);
394 }
395
396 emit_stencil_op(f, face_stencil, stencil_src, stencil_fail,
397 dsa->stencil[face].fail_op,
398 dsa->stencil[face].ref_value);
399
400 stencil_src = face_stencil;
401 }
402
403 if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
404 && (dsa->stencil[face].zfail_op != PIPE_STENCIL_OP_KEEP)) {
405 if (depth_complement) {
406 spe_and(f, depth_fail, stencil_pass, depth_mask);
407 } else {
408 spe_andc(f, depth_fail, stencil_pass, depth_mask);
409 }
410
411 emit_stencil_op(f, face_stencil, stencil_src, depth_fail,
412 dsa->stencil[face].zfail_op,
413 dsa->stencil[face].ref_value);
414 stencil_src = face_stencil;
415 }
416
417 if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
418 && (dsa->stencil[face].zpass_op != PIPE_STENCIL_OP_KEEP)) {
419 emit_stencil_op(f, face_stencil, stencil_src, depth_pass,
420 dsa->stencil[face].zpass_op,
421 dsa->stencil[face].ref_value);
422 stencil_src = face_stencil;
423 }
424 }
425
426 spe_release_register(f, stencil_fail);
427 spe_release_register(f, depth_fail);
428 spe_release_register(f, stencil_mask);
429 if (stencil_pass != mask) {
430 spe_release_register(f, stencil_pass);
431 }
432
433 /* If all of the stencil operations were KEEP or the stencil write mask was
434 * zero, "stencil_src" will still be set to "stencil". In this case
435 * release the "face_stencil" register. Otherwise apply the stencil write
436 * mask to select bits from the calculated stencil value and the previous
437 * stencil value.
438 */
439 if (stencil_src == stencil) {
440 spe_release_register(f, face_stencil);
441 } else if (dsa->stencil[face].write_mask != 0x0ff) {
442 int tmp = spe_allocate_available_register(f);
443
444 spe_il(f, tmp, dsa->stencil[face].write_mask);
445 spe_selb(f, stencil_src, stencil, stencil_src, tmp);
446
447 spe_release_register(f, tmp);
448 }
449
450 return stencil_src;
451 }
452
453
454 void
455 cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa)
456 {
457 struct pipe_depth_stencil_alpha_state *const dsa = &cdsa->base;
458 struct spe_function *const f = &cdsa->code;
459
460 /* This code generates a maximum of 6 (alpha test) + 3 (depth test)
461 * + 25 (front stencil) + 25 (back stencil) + 4 = 63 instructions. Round
462 * up to 64 to make it a happy power-of-two.
463 */
464 spe_init_func(f, 4 * 64);
465
466
467 /* Allocate registers for the function's input parameters. Cleverly (and
468 * clever code is usually dangerous, but I couldn't resist) the generated
469 * function returns a structure. Returned structures start with register
470 * 3, and the structure fields are ordered to match up exactly with the
471 * input parameters.
472 */
473 int mask = spe_allocate_register(f, 3);
474 int depth = spe_allocate_register(f, 4);
475 int stencil = spe_allocate_register(f, 5);
476 int zvals = spe_allocate_register(f, 6);
477 int frag_a = spe_allocate_register(f, 7);
478 int facing = spe_allocate_register(f, 8);
479
480 int depth_mask = spe_allocate_available_register(f);
481
482 boolean depth_complement;
483
484
485 emit_alpha_test(dsa, f, mask, frag_a);
486
487 depth_complement = emit_depth_test(dsa, f, depth_mask, depth, zvals);
488
489 if (dsa->stencil[0].enabled) {
490 const int front_depth_pass = spe_allocate_available_register(f);
491 int front_stencil = emit_stencil_test(dsa, 0, f, mask,
492 depth_mask, depth_complement,
493 stencil, front_depth_pass);
494
495 if (dsa->stencil[1].enabled) {
496 const int back_depth_pass = spe_allocate_available_register(f);
497 int back_stencil = emit_stencil_test(dsa, 1, f, mask,
498 depth_mask, depth_complement,
499 stencil, back_depth_pass);
500
501 /* If the front facing stencil value and the back facing stencil
502 * value are stored in the same register, there is no need to select
503 * a value based on the facing. This can happen if the stencil value
504 * was not modified due to the write masks being zero, the stencil
505 * operations being KEEP, etc.
506 */
507 if (front_stencil != back_stencil) {
508 spe_selb(f, stencil, back_stencil, front_stencil, facing);
509 }
510
511 if (back_stencil != stencil) {
512 spe_release_register(f, back_stencil);
513 }
514
515 if (front_stencil != stencil) {
516 spe_release_register(f, front_stencil);
517 }
518
519 spe_selb(f, mask, back_depth_pass, front_depth_pass, facing);
520
521 spe_release_register(f, back_depth_pass);
522 } else {
523 if (front_stencil != stencil) {
524 spe_or(f, stencil, front_stencil, front_stencil);
525 spe_release_register(f, front_stencil);
526 }
527 spe_or(f, mask, front_depth_pass, front_depth_pass);
528 }
529
530 spe_release_register(f, front_depth_pass);
531 } else if (dsa->depth.enabled) {
532 if (depth_complement) {
533 spe_andc(f, mask, mask, depth_mask);
534 } else {
535 spe_and(f, mask, mask, depth_mask);
536 }
537 }
538
539 if (dsa->depth.writemask) {
540 spe_selb(f, depth, depth, zvals, mask);
541 }
542
543 spe_bi(f, 0, 0, 0);
544
545
546 #if 0
547 {
548 const uint32_t *p = f->store;
549 unsigned i;
550
551 printf("# alpha (%sabled)\n",
552 (dsa->alpha.enabled) ? "en" : "dis");
553 printf("# func: %u\n", dsa->alpha.func);
554 printf("# ref: %.2f\n", dsa->alpha.ref);
555
556 printf("# depth (%sabled)\n",
557 (dsa->depth.enabled) ? "en" : "dis");
558 printf("# func: %u\n", dsa->depth.func);
559
560 for (i = 0; i < 2; i++) {
561 printf("# %s stencil (%sabled)\n",
562 (i == 0) ? "front" : "back",
563 (dsa->stencil[i].enabled) ? "en" : "dis");
564
565 printf("# func: %u\n", dsa->stencil[i].func);
566 printf("# op (sf, zf, zp): %u %u %u\n",
567 dsa->stencil[i].fail_op,
568 dsa->stencil[i].zfail_op,
569 dsa->stencil[i].zpass_op);
570 printf("# ref value / value mask / write mask: %02x %02x %02x\n",
571 dsa->stencil[i].ref_value,
572 dsa->stencil[i].value_mask,
573 dsa->stencil[i].write_mask);
574 }
575
576 printf("\t.text\n");
577 for (/* empty */; p < f->csr; p++) {
578 printf("\t.long\t0x%04x\n", *p);
579 }
580 fflush(stdout);
581 }
582 #endif
583 }
584
585
586 /**
587 * \note Emits a maximum of 3 instructions
588 */
589 static int
590 emit_alpha_factor_calculation(struct spe_function *f,
591 unsigned factor,
592 int src_alpha, int dst_alpha, int const_alpha)
593 {
594 int factor_reg;
595 int tmp;
596
597
598 switch (factor) {
599 case PIPE_BLENDFACTOR_ONE:
600 factor_reg = -1;
601 break;
602
603 case PIPE_BLENDFACTOR_SRC_ALPHA:
604 factor_reg = spe_allocate_available_register(f);
605
606 spe_or(f, factor_reg, src_alpha, src_alpha);
607 break;
608
609 case PIPE_BLENDFACTOR_DST_ALPHA:
610 factor_reg = dst_alpha;
611 break;
612
613 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
614 factor_reg = -1;
615 break;
616
617 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
618 factor_reg = spe_allocate_available_register(f);
619
620 tmp = spe_allocate_available_register(f);
621 spe_il(f, tmp, 1);
622 spe_cuflt(f, tmp, tmp, 0);
623 spe_fs(f, factor_reg, tmp, const_alpha);
624 spe_release_register(f, tmp);
625 break;
626
627 case PIPE_BLENDFACTOR_CONST_ALPHA:
628 factor_reg = const_alpha;
629 break;
630
631 case PIPE_BLENDFACTOR_ZERO:
632 factor_reg = -1;
633 break;
634
635 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
636 tmp = spe_allocate_available_register(f);
637 factor_reg = spe_allocate_available_register(f);
638
639 spe_il(f, tmp, 1);
640 spe_cuflt(f, tmp, tmp, 0);
641 spe_fs(f, factor_reg, tmp, src_alpha);
642
643 spe_release_register(f, tmp);
644 break;
645
646 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
647 tmp = spe_allocate_available_register(f);
648 factor_reg = spe_allocate_available_register(f);
649
650 spe_il(f, tmp, 1);
651 spe_cuflt(f, tmp, tmp, 0);
652 spe_fs(f, factor_reg, tmp, dst_alpha);
653
654 spe_release_register(f, tmp);
655 break;
656
657 case PIPE_BLENDFACTOR_SRC1_ALPHA:
658 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
659 default:
660 assert(0);
661 factor_reg = -1;
662 break;
663 }
664
665 return factor_reg;
666 }
667
668
669 /**
670 * \note Emits a maximum of 6 instructions
671 */
672 static void
673 emit_color_factor_calculation(struct spe_function *f,
674 unsigned sF, unsigned mask,
675 const int *src,
676 const int *dst,
677 const int *const_color,
678 int *factor)
679 {
680 int tmp;
681 unsigned i;
682
683
684 factor[0] = -1;
685 factor[1] = -1;
686 factor[2] = -1;
687 factor[3] = -1;
688
689 switch (sF) {
690 case PIPE_BLENDFACTOR_ONE:
691 break;
692
693 case PIPE_BLENDFACTOR_SRC_COLOR:
694 for (i = 0; i < 3; ++i) {
695 if ((mask & (1U << i)) != 0) {
696 factor[i] = spe_allocate_available_register(f);
697 spe_or(f, factor[i], src[i], src[i]);
698 }
699 }
700 break;
701
702 case PIPE_BLENDFACTOR_SRC_ALPHA:
703 factor[0] = spe_allocate_available_register(f);
704 factor[1] = factor[0];
705 factor[2] = factor[0];
706
707 spe_or(f, factor[0], src[3], src[3]);
708 break;
709
710 case PIPE_BLENDFACTOR_DST_ALPHA:
711 factor[0] = dst[3];
712 factor[1] = dst[3];
713 factor[2] = dst[3];
714 break;
715
716 case PIPE_BLENDFACTOR_DST_COLOR:
717 factor[0] = dst[0];
718 factor[1] = dst[1];
719 factor[2] = dst[2];
720 break;
721
722 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
723 tmp = spe_allocate_available_register(f);
724 factor[0] = spe_allocate_available_register(f);
725 factor[1] = factor[0];
726 factor[2] = factor[0];
727
728 /* Alpha saturate means min(As, 1-Ad).
729 */
730 spe_il(f, tmp, 1);
731 spe_cuflt(f, tmp, tmp, 0);
732 spe_fs(f, tmp, tmp, dst[3]);
733 spe_fcgt(f, factor[0], tmp, src[3]);
734 spe_selb(f, factor[0], src[3], tmp, factor[0]);
735
736 spe_release_register(f, tmp);
737 break;
738
739 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
740 tmp = spe_allocate_available_register(f);
741 spe_il(f, tmp, 1);
742 spe_cuflt(f, tmp, tmp, 0);
743
744 for (i = 0; i < 3; i++) {
745 factor[i] = spe_allocate_available_register(f);
746
747 spe_fs(f, factor[i], tmp, const_color[i]);
748 }
749 spe_release_register(f, tmp);
750 break;
751
752 case PIPE_BLENDFACTOR_CONST_COLOR:
753 for (i = 0; i < 3; i++) {
754 factor[i] = const_color[i];
755 }
756 break;
757
758 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
759 factor[0] = spe_allocate_available_register(f);
760 factor[1] = factor[0];
761 factor[2] = factor[0];
762
763 tmp = spe_allocate_available_register(f);
764 spe_il(f, tmp, 1);
765 spe_cuflt(f, tmp, tmp, 0);
766 spe_fs(f, factor[0], tmp, const_color[3]);
767 spe_release_register(f, tmp);
768 break;
769
770 case PIPE_BLENDFACTOR_CONST_ALPHA:
771 factor[0] = const_color[3];
772 factor[1] = factor[0];
773 factor[2] = factor[0];
774 break;
775
776 case PIPE_BLENDFACTOR_ZERO:
777 break;
778
779 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
780 tmp = spe_allocate_available_register(f);
781
782 spe_il(f, tmp, 1);
783 spe_cuflt(f, tmp, tmp, 0);
784
785 for (i = 0; i < 3; ++i) {
786 if ((mask & (1U << i)) != 0) {
787 factor[i] = spe_allocate_available_register(f);
788 spe_fs(f, factor[i], tmp, src[i]);
789 }
790 }
791
792 spe_release_register(f, tmp);
793 break;
794
795 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
796 tmp = spe_allocate_available_register(f);
797 factor[0] = spe_allocate_available_register(f);
798 factor[1] = factor[0];
799 factor[2] = factor[0];
800
801 spe_il(f, tmp, 1);
802 spe_cuflt(f, tmp, tmp, 0);
803 spe_fs(f, factor[0], tmp, src[3]);
804
805 spe_release_register(f, tmp);
806 break;
807
808 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
809 tmp = spe_allocate_available_register(f);
810 factor[0] = spe_allocate_available_register(f);
811 factor[1] = factor[0];
812 factor[2] = factor[0];
813
814 spe_il(f, tmp, 1);
815 spe_cuflt(f, tmp, tmp, 0);
816 spe_fs(f, factor[0], tmp, dst[3]);
817
818 spe_release_register(f, tmp);
819 break;
820
821 case PIPE_BLENDFACTOR_INV_DST_COLOR:
822 tmp = spe_allocate_available_register(f);
823
824 spe_il(f, tmp, 1);
825 spe_cuflt(f, tmp, tmp, 0);
826
827 for (i = 0; i < 3; ++i) {
828 if ((mask & (1U << i)) != 0) {
829 factor[i] = spe_allocate_available_register(f);
830 spe_fs(f, factor[i], tmp, dst[i]);
831 }
832 }
833
834 spe_release_register(f, tmp);
835 break;
836
837 case PIPE_BLENDFACTOR_SRC1_COLOR:
838 case PIPE_BLENDFACTOR_SRC1_ALPHA:
839 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
840 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
841 default:
842 assert(0);
843 }
844 }
845
846
847 static void
848 emit_blend_calculation(struct spe_function *f,
849 unsigned func, unsigned sF, unsigned dF,
850 int src, int src_factor, int dst, int dst_factor)
851 {
852 int tmp = spe_allocate_available_register(f);
853
854 switch (func) {
855 case PIPE_BLEND_ADD:
856 if (sF == PIPE_BLENDFACTOR_ONE) {
857 if (dF == PIPE_BLENDFACTOR_ZERO) {
858 /* Do nothing. */
859 } else if (dF == PIPE_BLENDFACTOR_ONE) {
860 spe_fa(f, src, src, dst);
861 }
862 } else if (sF == PIPE_BLENDFACTOR_ZERO) {
863 if (dF == PIPE_BLENDFACTOR_ZERO) {
864 spe_il(f, src, 0);
865 } else if (dF == PIPE_BLENDFACTOR_ONE) {
866 spe_or(f, src, dst, dst);
867 } else {
868 spe_fm(f, src, dst, dst_factor);
869 }
870 } else if (dF == PIPE_BLENDFACTOR_ZERO) {
871 spe_fm(f, src, src, src_factor);
872 } else {
873 spe_fm(f, tmp, dst, dst_factor);
874 spe_fma(f, src, src, src_factor, tmp);
875 }
876 break;
877
878 case PIPE_BLEND_SUBTRACT:
879 if (sF == PIPE_BLENDFACTOR_ONE) {
880 if (dF == PIPE_BLENDFACTOR_ZERO) {
881 /* Do nothing. */
882 } else if (dF == PIPE_BLENDFACTOR_ONE) {
883 spe_fs(f, src, src, dst);
884 }
885 } else if (sF == PIPE_BLENDFACTOR_ZERO) {
886 if (dF == PIPE_BLENDFACTOR_ZERO) {
887 spe_il(f, src, 0);
888 } else if (dF == PIPE_BLENDFACTOR_ONE) {
889 spe_il(f, tmp, 0);
890 spe_fs(f, src, tmp, dst);
891 } else {
892 spe_fm(f, src, dst, dst_factor);
893 }
894 } else if (dF == PIPE_BLENDFACTOR_ZERO) {
895 spe_fm(f, src, src, src_factor);
896 } else {
897 spe_fm(f, tmp, dst, dst_factor);
898 spe_fms(f, src, src, src_factor, tmp);
899 }
900 break;
901
902 case PIPE_BLEND_REVERSE_SUBTRACT:
903 if (sF == PIPE_BLENDFACTOR_ONE) {
904 if (dF == PIPE_BLENDFACTOR_ZERO) {
905 spe_il(f, tmp, 0);
906 spe_fs(f, src, tmp, src);
907 } else if (dF == PIPE_BLENDFACTOR_ONE) {
908 spe_fs(f, src, dst, src);
909 }
910 } else if (sF == PIPE_BLENDFACTOR_ZERO) {
911 if (dF == PIPE_BLENDFACTOR_ZERO) {
912 spe_il(f, src, 0);
913 } else if (dF == PIPE_BLENDFACTOR_ONE) {
914 spe_or(f, src, dst, dst);
915 } else {
916 spe_fm(f, src, dst, dst_factor);
917 }
918 } else if (dF == PIPE_BLENDFACTOR_ZERO) {
919 spe_fm(f, src, src, src_factor);
920 } else {
921 spe_fm(f, tmp, src, src_factor);
922 spe_fms(f, src, src, dst_factor, tmp);
923 }
924 break;
925
926 case PIPE_BLEND_MIN:
927 spe_cgt(f, tmp, src, dst);
928 spe_selb(f, src, src, dst, tmp);
929 break;
930
931 case PIPE_BLEND_MAX:
932 spe_cgt(f, tmp, src, dst);
933 spe_selb(f, src, dst, src, tmp);
934 break;
935
936 default:
937 assert(0);
938 }
939
940 spe_release_register(f, tmp);
941 }
942
943
944 /**
945 * Generate code to perform alpha blending on the SPE
946 */
947 void
948 cell_generate_alpha_blend(struct cell_blend_state *cb)
949 {
950 struct pipe_blend_state *const b = &cb->base;
951 struct spe_function *const f = &cb->code;
952
953 /* This code generates a maximum of 3 (source alpha factor)
954 * + 3 (destination alpha factor) + (3 * 6) (source color factor)
955 * + (3 * 6) (destination color factor) + (4 * 2) (blend equation)
956 * + 4 (fragment mask) + 1 (return) = 55 instlructions. Round up to 64 to
957 * make it a happy power-of-two.
958 */
959 spe_init_func(f, 4 * 64);
960
961
962 const int frag[4] = {
963 spe_allocate_register(f, 3),
964 spe_allocate_register(f, 4),
965 spe_allocate_register(f, 5),
966 spe_allocate_register(f, 6),
967 };
968 const int pixel[4] = {
969 spe_allocate_register(f, 7),
970 spe_allocate_register(f, 8),
971 spe_allocate_register(f, 9),
972 spe_allocate_register(f, 10),
973 };
974 const int const_color[4] = {
975 spe_allocate_register(f, 11),
976 spe_allocate_register(f, 12),
977 spe_allocate_register(f, 13),
978 spe_allocate_register(f, 14),
979 };
980 unsigned func[4];
981 unsigned sF[4];
982 unsigned dF[4];
983 unsigned i;
984 int src_factor[4];
985 int dst_factor[4];
986
987
988 /* Does the selected blend mode make use of the source / destination
989 * color (RGB) blend factors?
990 */
991 boolean need_color_factor = b->blend_enable
992 && (b->rgb_func != PIPE_BLEND_MIN)
993 && (b->rgb_func != PIPE_BLEND_MAX);
994
995 /* Does the selected blend mode make use of the source / destination
996 * alpha blend factors?
997 */
998 boolean need_alpha_factor = b->blend_enable
999 && (b->alpha_func != PIPE_BLEND_MIN)
1000 && (b->alpha_func != PIPE_BLEND_MAX);
1001
1002
1003 if (b->blend_enable) {
1004 sF[0] = b->rgb_src_factor;
1005 sF[1] = sF[0];
1006 sF[2] = sF[0];
1007 switch (b->alpha_src_factor & 0x0f) {
1008 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
1009 sF[3] = PIPE_BLENDFACTOR_ONE;
1010 break;
1011 case PIPE_BLENDFACTOR_SRC_COLOR:
1012 case PIPE_BLENDFACTOR_DST_COLOR:
1013 case PIPE_BLENDFACTOR_CONST_COLOR:
1014 case PIPE_BLENDFACTOR_SRC1_COLOR:
1015 sF[3] = b->alpha_src_factor + 1;
1016 break;
1017 default:
1018 sF[3] = b->alpha_src_factor;
1019 }
1020
1021 dF[0] = b->rgb_dst_factor;
1022 dF[1] = dF[0];
1023 dF[2] = dF[0];
1024 switch (b->alpha_dst_factor & 0x0f) {
1025 case PIPE_BLENDFACTOR_SRC_COLOR:
1026 case PIPE_BLENDFACTOR_DST_COLOR:
1027 case PIPE_BLENDFACTOR_CONST_COLOR:
1028 case PIPE_BLENDFACTOR_SRC1_COLOR:
1029 dF[3] = b->alpha_dst_factor + 1;
1030 break;
1031 default:
1032 dF[3] = b->alpha_dst_factor;
1033 }
1034
1035 func[0] = b->rgb_func;
1036 func[1] = func[0];
1037 func[2] = func[0];
1038 func[3] = b->alpha_func;
1039 } else {
1040 sF[0] = PIPE_BLENDFACTOR_ONE;
1041 sF[1] = PIPE_BLENDFACTOR_ONE;
1042 sF[2] = PIPE_BLENDFACTOR_ONE;
1043 sF[3] = PIPE_BLENDFACTOR_ONE;
1044 dF[0] = PIPE_BLENDFACTOR_ZERO;
1045 dF[1] = PIPE_BLENDFACTOR_ZERO;
1046 dF[2] = PIPE_BLENDFACTOR_ZERO;
1047 dF[3] = PIPE_BLENDFACTOR_ZERO;
1048
1049 func[0] = PIPE_BLEND_ADD;
1050 func[1] = PIPE_BLEND_ADD;
1051 func[2] = PIPE_BLEND_ADD;
1052 func[3] = PIPE_BLEND_ADD;
1053 }
1054
1055
1056 /* If alpha writing is enabled and the alpha blend mode requires use of
1057 * the alpha factor, calculate the alpha factor.
1058 */
1059 if (((b->colormask & 8) != 0) && need_alpha_factor) {
1060 src_factor[3] = emit_alpha_factor_calculation(f, sF[3], const_color[3],
1061 frag[3], pixel[3]);
1062
1063 /* If the alpha destination blend factor is the same as the alpha source
1064 * blend factor, re-use the previously calculated value.
1065 */
1066 dst_factor[3] = (dF[3] == sF[3])
1067 ? src_factor[3]
1068 : emit_alpha_factor_calculation(f, dF[3], const_color[3],
1069 frag[3], pixel[3]);
1070 }
1071
1072
1073 if (sF[0] == sF[3]) {
1074 src_factor[0] = src_factor[3];
1075 src_factor[1] = src_factor[3];
1076 src_factor[2] = src_factor[3];
1077 } else if (sF[0] == dF[3]) {
1078 src_factor[0] = dst_factor[3];
1079 src_factor[1] = dst_factor[3];
1080 src_factor[2] = dst_factor[3];
1081 } else if (need_color_factor) {
1082 emit_color_factor_calculation(f,
1083 b->rgb_src_factor,
1084 b->colormask,
1085 frag, pixel, const_color, src_factor);
1086 }
1087
1088
1089 if (dF[0] == sF[3]) {
1090 dst_factor[0] = src_factor[3];
1091 dst_factor[1] = src_factor[3];
1092 dst_factor[2] = src_factor[3];
1093 } else if (dF[0] == dF[3]) {
1094 dst_factor[0] = dst_factor[3];
1095 dst_factor[1] = dst_factor[3];
1096 dst_factor[2] = dst_factor[3];
1097 } else if (dF[0] == sF[0]) {
1098 dst_factor[0] = src_factor[0];
1099 dst_factor[1] = src_factor[1];
1100 dst_factor[2] = src_factor[2];
1101 } else if (need_color_factor) {
1102 emit_color_factor_calculation(f,
1103 b->rgb_dst_factor,
1104 b->colormask,
1105 frag, pixel, const_color, dst_factor);
1106 }
1107
1108
1109
1110 for (i = 0; i < 4; ++i) {
1111 if ((b->colormask & (1U << i)) != 0) {
1112 emit_blend_calculation(f,
1113 func[i], sF[i], dF[i],
1114 frag[i], src_factor[i],
1115 pixel[i], dst_factor[i]);
1116 }
1117 }
1118
1119 spe_bi(f, 0, 0, 0);
1120
1121 #if 0
1122 {
1123 const uint32_t *p = f->store;
1124
1125 printf("# %u instructions\n", f->csr - f->store);
1126 printf("# blend (%sabled)\n",
1127 (cb->base.blend_enable) ? "en" : "dis");
1128 printf("# RGB func / sf / df: %u %u %u\n",
1129 cb->base.rgb_func,
1130 cb->base.rgb_src_factor,
1131 cb->base.rgb_dst_factor);
1132 printf("# ALP func / sf / df: %u %u %u\n",
1133 cb->base.alpha_func,
1134 cb->base.alpha_src_factor,
1135 cb->base.alpha_dst_factor);
1136
1137 printf("\t.text\n");
1138 for (/* empty */; p < f->csr; p++) {
1139 printf("\t.long\t0x%04x\n", *p);
1140 }
1141 fflush(stdout);
1142 }
1143 #endif
1144 }
1145
1146
1147 int PC_OFFSET(const struct spe_function *f, const void *d)
1148 {
1149 const intptr_t pc = (intptr_t) f->csr;
1150 const intptr_t ea = ~0x0f & (intptr_t) d;
1151
1152 return (ea - pc) >> 2;
1153 }
1154
1155
1156 /**
1157 * Generate code to perform color conversion and logic op
1158 *
1159 * \bug
1160 * The code generated by this function should also perform dithering.
1161 *
1162 * \bug
1163 * The code generated by this function should also perform color-write
1164 * masking.
1165 *
1166 * \bug
1167 * Only two framebuffer formats are supported at this time.
1168 */
1169 void
1170 cell_generate_logic_op(struct spe_function *f,
1171 const struct pipe_blend_state *blend,
1172 struct pipe_surface *surf)
1173 {
1174 const unsigned logic_op = (blend->logicop_enable)
1175 ? blend->logicop_func : PIPE_LOGICOP_COPY;
1176
1177 /* This code generates a maximum of 37 instructions. An additional 32
1178 * bytes (equiv. to 8 instructions) are needed for data storage. Round up
1179 * to 64 to make it a happy power-of-two.
1180 */
1181 spe_init_func(f, 4 * 64);
1182
1183
1184 /* Pixel colors in framebuffer format in AoS layout.
1185 */
1186 const int pixel[4] = {
1187 spe_allocate_register(f, 3),
1188 spe_allocate_register(f, 4),
1189 spe_allocate_register(f, 5),
1190 spe_allocate_register(f, 6),
1191 };
1192
1193 /* Fragment colors stored as floats in SoA layout.
1194 */
1195 const int frag[4] = {
1196 spe_allocate_register(f, 7),
1197 spe_allocate_register(f, 8),
1198 spe_allocate_register(f, 9),
1199 spe_allocate_register(f, 10),
1200 };
1201
1202 const int mask = spe_allocate_register(f, 11);
1203
1204
1205 /* Short-circuit the noop and invert cases.
1206 */
1207 if ((logic_op == PIPE_LOGICOP_NOOP) || (blend->colormask == 0)) {
1208 spe_bi(f, 0, 0, 0);
1209 return;
1210 } else if (logic_op == PIPE_LOGICOP_INVERT) {
1211 spe_nor(f, pixel[0], pixel[0], pixel[0]);
1212 spe_nor(f, pixel[1], pixel[1], pixel[1]);
1213 spe_nor(f, pixel[2], pixel[2], pixel[2]);
1214 spe_nor(f, pixel[3], pixel[3], pixel[3]);
1215 spe_bi(f, 0, 0, 0);
1216 return;
1217 }
1218
1219
1220 const int tmp[4] = {
1221 spe_allocate_available_register(f),
1222 spe_allocate_available_register(f),
1223 spe_allocate_available_register(f),
1224 spe_allocate_available_register(f),
1225 };
1226
1227 const int shuf_xpose_hi = spe_allocate_available_register(f);
1228 const int shuf_xpose_lo = spe_allocate_available_register(f);
1229 const int shuf_color = spe_allocate_available_register(f);
1230
1231
1232 /* Pointer to the begining of the function's private data area.
1233 */
1234 uint32_t *const data = ((uint32_t *) f->store) + (64 - 8);
1235
1236
1237 /* Convert fragment colors to framebuffer format in AoS layout.
1238 */
1239 switch (surf->format) {
1240 case PIPE_FORMAT_A8R8G8B8_UNORM:
1241 data[0] = 0x00010203;
1242 data[1] = 0x10111213;
1243 data[2] = 0x04050607;
1244 data[3] = 0x14151617;
1245 data[4] = 0x0c000408;
1246 data[5] = 0x80808080;
1247 data[6] = 0x80808080;
1248 data[7] = 0x80808080;
1249 break;
1250 case PIPE_FORMAT_B8G8R8A8_UNORM:
1251 data[0] = 0x03020100;
1252 data[1] = 0x13121110;
1253 data[2] = 0x07060504;
1254 data[3] = 0x17161514;
1255 data[4] = 0x0804000c;
1256 data[5] = 0x80808080;
1257 data[6] = 0x80808080;
1258 data[7] = 0x80808080;
1259 break;
1260 default:
1261 fprintf(stderr, "CELL: Bad pixel format in cell_generate_logic_op()");
1262 ASSERT(0);
1263 }
1264
1265 spe_ilh(f, tmp[0], 0x0808);
1266 spe_lqr(f, shuf_xpose_hi, PC_OFFSET(f, data+0));
1267 spe_lqr(f, shuf_color, PC_OFFSET(f, data+4));
1268 spe_a(f, shuf_xpose_lo, shuf_xpose_hi, tmp[0]);
1269
1270 spe_shufb(f, tmp[0], frag[0], frag[2], shuf_xpose_hi);
1271 spe_shufb(f, tmp[1], frag[0], frag[2], shuf_xpose_lo);
1272 spe_shufb(f, tmp[2], frag[1], frag[3], shuf_xpose_hi);
1273 spe_shufb(f, tmp[3], frag[1], frag[3], shuf_xpose_lo);
1274
1275 spe_shufb(f, frag[0], tmp[0], tmp[2], shuf_xpose_hi);
1276 spe_shufb(f, frag[1], tmp[0], tmp[2], shuf_xpose_lo);
1277 spe_shufb(f, frag[2], tmp[1], tmp[3], shuf_xpose_hi);
1278 spe_shufb(f, frag[3], tmp[1], tmp[3], shuf_xpose_lo);
1279
1280 spe_cfltu(f, frag[0], frag[0], 32);
1281 spe_cfltu(f, frag[1], frag[1], 32);
1282 spe_cfltu(f, frag[2], frag[2], 32);
1283 spe_cfltu(f, frag[3], frag[3], 32);
1284
1285 spe_shufb(f, frag[0], frag[0], pixel[0], shuf_color);
1286 spe_shufb(f, frag[1], frag[1], pixel[1], shuf_color);
1287 spe_shufb(f, frag[2], frag[2], pixel[2], shuf_color);
1288 spe_shufb(f, frag[3], frag[3], pixel[3], shuf_color);
1289
1290
1291 /* If logic op is enabled, perform the requested logical operation on the
1292 * converted fragment colors and the pixel colors.
1293 */
1294 switch (logic_op) {
1295 case PIPE_LOGICOP_CLEAR:
1296 spe_il(f, frag[0], 0);
1297 spe_il(f, frag[1], 0);
1298 spe_il(f, frag[2], 0);
1299 spe_il(f, frag[3], 0);
1300 break;
1301 case PIPE_LOGICOP_NOR:
1302 spe_nor(f, frag[0], frag[0], pixel[0]);
1303 spe_nor(f, frag[1], frag[1], pixel[1]);
1304 spe_nor(f, frag[2], frag[2], pixel[2]);
1305 spe_nor(f, frag[3], frag[3], pixel[3]);
1306 break;
1307 case PIPE_LOGICOP_AND_INVERTED:
1308 spe_andc(f, frag[0], pixel[0], frag[0]);
1309 spe_andc(f, frag[1], pixel[1], frag[1]);
1310 spe_andc(f, frag[2], pixel[2], frag[2]);
1311 spe_andc(f, frag[3], pixel[3], frag[3]);
1312 break;
1313 case PIPE_LOGICOP_COPY_INVERTED:
1314 spe_nor(f, frag[0], frag[0], frag[0]);
1315 spe_nor(f, frag[1], frag[1], frag[1]);
1316 spe_nor(f, frag[2], frag[2], frag[2]);
1317 spe_nor(f, frag[3], frag[3], frag[3]);
1318 break;
1319 case PIPE_LOGICOP_AND_REVERSE:
1320 spe_andc(f, frag[0], frag[0], pixel[0]);
1321 spe_andc(f, frag[1], frag[1], pixel[1]);
1322 spe_andc(f, frag[2], frag[2], pixel[2]);
1323 spe_andc(f, frag[3], frag[3], pixel[3]);
1324 break;
1325 case PIPE_LOGICOP_XOR:
1326 spe_xor(f, frag[0], frag[0], pixel[0]);
1327 spe_xor(f, frag[1], frag[1], pixel[1]);
1328 spe_xor(f, frag[2], frag[2], pixel[2]);
1329 spe_xor(f, frag[3], frag[3], pixel[3]);
1330 break;
1331 case PIPE_LOGICOP_NAND:
1332 spe_nand(f, frag[0], frag[0], pixel[0]);
1333 spe_nand(f, frag[1], frag[1], pixel[1]);
1334 spe_nand(f, frag[2], frag[2], pixel[2]);
1335 spe_nand(f, frag[3], frag[3], pixel[3]);
1336 break;
1337 case PIPE_LOGICOP_AND:
1338 spe_and(f, frag[0], frag[0], pixel[0]);
1339 spe_and(f, frag[1], frag[1], pixel[1]);
1340 spe_and(f, frag[2], frag[2], pixel[2]);
1341 spe_and(f, frag[3], frag[3], pixel[3]);
1342 break;
1343 case PIPE_LOGICOP_EQUIV:
1344 spe_eqv(f, frag[0], frag[0], pixel[0]);
1345 spe_eqv(f, frag[1], frag[1], pixel[1]);
1346 spe_eqv(f, frag[2], frag[2], pixel[2]);
1347 spe_eqv(f, frag[3], frag[3], pixel[3]);
1348 break;
1349 case PIPE_LOGICOP_OR_INVERTED:
1350 spe_orc(f, frag[0], pixel[0], frag[0]);
1351 spe_orc(f, frag[1], pixel[1], frag[1]);
1352 spe_orc(f, frag[2], pixel[2], frag[2]);
1353 spe_orc(f, frag[3], pixel[3], frag[3]);
1354 break;
1355 case PIPE_LOGICOP_COPY:
1356 break;
1357 case PIPE_LOGICOP_OR_REVERSE:
1358 spe_orc(f, frag[0], frag[0], pixel[0]);
1359 spe_orc(f, frag[1], frag[1], pixel[1]);
1360 spe_orc(f, frag[2], frag[2], pixel[2]);
1361 spe_orc(f, frag[3], frag[3], pixel[3]);
1362 break;
1363 case PIPE_LOGICOP_OR:
1364 spe_or(f, frag[0], frag[0], pixel[0]);
1365 spe_or(f, frag[1], frag[1], pixel[1]);
1366 spe_or(f, frag[2], frag[2], pixel[2]);
1367 spe_or(f, frag[3], frag[3], pixel[3]);
1368 break;
1369 case PIPE_LOGICOP_SET:
1370 spe_il(f, frag[0], ~0);
1371 spe_il(f, frag[1], ~0);
1372 spe_il(f, frag[2], ~0);
1373 spe_il(f, frag[3], ~0);
1374 break;
1375
1376 /* These two cases are short-circuited above.
1377 */
1378 case PIPE_LOGICOP_INVERT:
1379 case PIPE_LOGICOP_NOOP:
1380 default:
1381 assert(0);
1382 }
1383
1384
1385 /* Apply fragment mask.
1386 */
1387 spe_ilh(f, tmp[0], 0x0000);
1388 spe_ilh(f, tmp[1], 0x0404);
1389 spe_ilh(f, tmp[2], 0x0808);
1390 spe_ilh(f, tmp[3], 0x0c0c);
1391
1392 spe_shufb(f, tmp[0], mask, mask, tmp[0]);
1393 spe_shufb(f, tmp[1], mask, mask, tmp[1]);
1394 spe_shufb(f, tmp[2], mask, mask, tmp[2]);
1395 spe_shufb(f, tmp[3], mask, mask, tmp[3]);
1396
1397 spe_selb(f, pixel[0], pixel[0], frag[0], tmp[0]);
1398 spe_selb(f, pixel[1], pixel[1], frag[1], tmp[1]);
1399 spe_selb(f, pixel[2], pixel[2], frag[2], tmp[2]);
1400 spe_selb(f, pixel[3], pixel[3], frag[3], tmp[3]);
1401
1402 spe_bi(f, 0, 0, 0);
1403
1404 #if 0
1405 {
1406 const uint32_t *p = f->store;
1407 unsigned i;
1408
1409 printf("# %u instructions\n", f->csr - f->store);
1410
1411 printf("\t.text\n");
1412 for (i = 0; i < 64; i++) {
1413 printf("\t.long\t0x%04x\n", p[i]);
1414 }
1415 fflush(stdout);
1416 }
1417 #endif
1418 }