Merge remote branch 'origin/7.8'
[mesa.git] / src / gallium / drivers / cell / ppu / cell_state_per_fragment.c
1 /*
2 * (C) Copyright IBM Corporation 2008
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file
27 * Generate code to perform all per-fragment operations.
28 *
29 * Code generated by these functions perform both alpha, depth, and stencil
30 * testing as well as alpha blending.
31 *
32 * \note
33 * Occlusion query is not supported, but this is the right place to add that
34 * support.
35 *
36 * \author Ian Romanick <idr@us.ibm.com>
37 */
38
39 #include "pipe/p_defines.h"
40 #include "pipe/p_state.h"
41
42 #include "cell_context.h"
43
44 #include "rtasm/rtasm_ppc_spe.h"
45
46
47 /**
48 * Generate code to perform alpha testing.
49 *
50 * The code generated by this function uses the register specificed by
51 * \c mask as both an input and an output.
52 *
53 * \param dsa Current alpha-test state
54 * \param f Function to which code should be appended
55 * \param mask Index of register containing active fragment mask
56 * \param alphas Index of register containing per-fragment alpha values
57 *
58 * \note Emits a maximum of 6 instructions.
59 */
60 static void
61 emit_alpha_test(struct pipe_depth_stencil_alpha_state *dsa,
62 struct spe_function *f, int mask, int alphas)
63 {
64 /* If the alpha function is either NEVER or ALWAYS, there is no need to
65 * load the reference value into a register. ALWAYS is a fairly common
66 * case, and this optimization saves 2 instructions.
67 */
68 if (dsa->alpha.enabled
69 && (dsa->alpha.func != PIPE_FUNC_NEVER)
70 && (dsa->alpha.func != PIPE_FUNC_ALWAYS)) {
71 int ref = spe_allocate_available_register(f);
72 int tmp_a = spe_allocate_available_register(f);
73 int tmp_b = spe_allocate_available_register(f);
74 union {
75 float f;
76 unsigned u;
77 } ref_val;
78 boolean complement = FALSE;
79
80 ref_val.f = dsa->alpha.ref;
81
82 spe_il(f, ref, ref_val.u & 0x0000ffff);
83 spe_ilh(f, ref, ref_val.u >> 16);
84
85 switch (dsa->alpha.func) {
86 case PIPE_FUNC_NOTEQUAL:
87 complement = TRUE;
88 /* FALLTHROUGH */
89
90 case PIPE_FUNC_EQUAL:
91 spe_fceq(f, tmp_a, ref, alphas);
92 break;
93
94 case PIPE_FUNC_LEQUAL:
95 complement = TRUE;
96 /* FALLTHROUGH */
97
98 case PIPE_FUNC_GREATER:
99 spe_fcgt(f, tmp_a, ref, alphas);
100 break;
101
102 case PIPE_FUNC_LESS:
103 complement = TRUE;
104 /* FALLTHROUGH */
105
106 case PIPE_FUNC_GEQUAL:
107 spe_fcgt(f, tmp_a, ref, alphas);
108 spe_fceq(f, tmp_b, ref, alphas);
109 spe_or(f, tmp_a, tmp_b, tmp_a);
110 break;
111
112 case PIPE_FUNC_ALWAYS:
113 case PIPE_FUNC_NEVER:
114 default:
115 assert(0);
116 break;
117 }
118
119 if (complement) {
120 spe_andc(f, mask, mask, tmp_a);
121 } else {
122 spe_and(f, mask, mask, tmp_a);
123 }
124
125 spe_release_register(f, ref);
126 spe_release_register(f, tmp_a);
127 spe_release_register(f, tmp_b);
128 } else if (dsa->alpha.enabled && (dsa->alpha.func == PIPE_FUNC_NEVER)) {
129 spe_il(f, mask, 0);
130 }
131 }
132
133
134 /**
135 * Generate code to perform Z testing. Four Z values are tested at once.
136 * \param dsa Current depth-test state
137 * \param f Function to which code should be appended
138 * \param mask Index of register to contain depth-pass mask
139 * \param stored Index of register containing values from depth buffer
140 * \param calculated Index of register containing per-fragment depth values
141 *
142 * \return
143 * If the calculated depth comparison mask is the actual mask, \c FALSE is
144 * returned. If the calculated depth comparison mask is the compliment of
145 * the actual mask, \c TRUE is returned.
146 *
147 * \note Emits a maximum of 3 instructions.
148 */
149 static boolean
150 emit_depth_test(struct pipe_depth_stencil_alpha_state *dsa,
151 struct spe_function *f, int mask, int stored, int calculated)
152 {
153 unsigned func = (dsa->depth.enabled)
154 ? dsa->depth.func : PIPE_FUNC_ALWAYS;
155 int tmp = spe_allocate_available_register(f);
156 boolean compliment = FALSE;
157
158 switch (func) {
159 case PIPE_FUNC_NEVER:
160 spe_il(f, mask, 0);
161 break;
162
163 case PIPE_FUNC_NOTEQUAL:
164 compliment = TRUE;
165 /* FALLTHROUGH */
166 case PIPE_FUNC_EQUAL:
167 spe_ceq(f, mask, calculated, stored);
168 break;
169
170 case PIPE_FUNC_LEQUAL:
171 compliment = TRUE;
172 /* FALLTHROUGH */
173 case PIPE_FUNC_GREATER:
174 spe_clgt(f, mask, calculated, stored);
175 break;
176
177 case PIPE_FUNC_LESS:
178 compliment = TRUE;
179 /* FALLTHROUGH */
180 case PIPE_FUNC_GEQUAL:
181 spe_clgt(f, mask, calculated, stored);
182 spe_ceq(f, tmp, calculated, stored);
183 spe_or(f, mask, mask, tmp);
184 break;
185
186 case PIPE_FUNC_ALWAYS:
187 spe_il(f, mask, ~0);
188 break;
189
190 default:
191 assert(0);
192 break;
193 }
194
195 spe_release_register(f, tmp);
196 return compliment;
197 }
198
199
200 /**
201 * Generate code to apply the stencil operation (after testing).
202 * \note Emits a maximum of 5 instructions.
203 *
204 * \warning
205 * Since \c out and \c in might be the same register, this routine cannot
206 * generate code that uses \c out as a temporary.
207 */
208 static void
209 emit_stencil_op(struct spe_function *f,
210 int out, int in, int mask, unsigned op, unsigned ref)
211 {
212 const int clamp = spe_allocate_available_register(f);
213 const int clamp_mask = spe_allocate_available_register(f);
214 const int result = spe_allocate_available_register(f);
215
216 switch(op) {
217 case PIPE_STENCIL_OP_KEEP:
218 assert(0);
219 case PIPE_STENCIL_OP_ZERO:
220 spe_il(f, result, 0);
221 break;
222 case PIPE_STENCIL_OP_REPLACE:
223 spe_il(f, result, ref);
224 break;
225 case PIPE_STENCIL_OP_INCR:
226 /* clamp = [0xff, 0xff, 0xff, 0xff] */
227 spe_il(f, clamp, 0x0ff);
228 /* result[i] = in[i] + 1 */
229 spe_ai(f, result, in, 1);
230 /* clamp_mask[i] = (result[i] > 0xff) */
231 spe_clgti(f, clamp_mask, result, 0x0ff);
232 /* result[i] = clamp_mask[i] ? clamp[i] : result[i] */
233 spe_selb(f, result, result, clamp, clamp_mask);
234 break;
235 case PIPE_STENCIL_OP_DECR:
236 spe_il(f, clamp, 0);
237 spe_ai(f, result, in, -1);
238
239 /* If "(s-1) < 0" in signed arithemtic, then "(s-1) > MAX" in unsigned
240 * arithmetic.
241 */
242 spe_clgti(f, clamp_mask, result, 0x0ff);
243 spe_selb(f, result, result, clamp, clamp_mask);
244 break;
245 case PIPE_STENCIL_OP_INCR_WRAP:
246 spe_ai(f, result, in, 1);
247 break;
248 case PIPE_STENCIL_OP_DECR_WRAP:
249 spe_ai(f, result, in, -1);
250 break;
251 case PIPE_STENCIL_OP_INVERT:
252 spe_nor(f, result, in, in);
253 break;
254 default:
255 assert(0);
256 }
257
258 spe_selb(f, out, in, result, mask);
259
260 spe_release_register(f, result);
261 spe_release_register(f, clamp_mask);
262 spe_release_register(f, clamp);
263 }
264
265
266 /**
267 * Generate code to do stencil test. Four pixels are tested at once.
268 * \param dsa Depth / stencil test state
269 * \param face 0 for front face, 1 for back face
270 * \param f Function to append instructions to
271 * \param mask Register containing mask of fragments passing the
272 * alpha test
273 * \param depth_mask Register containing mask of fragments passing the
274 * depth test
275 * \param depth_compliment Is \c depth_mask the compliment of the actual mask?
276 * \param stencil Register containing values from stencil buffer
277 * \param depth_pass Register to store mask of fragments passing stencil test
278 * and depth test
279 *
280 * \note
281 * Emits a maximum of 10 + (3 * 5) = 25 instructions.
282 */
283 static int
284 emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa,
285 struct pipe_stencil_ref *sr,
286 unsigned face,
287 struct spe_function *f,
288 int mask,
289 int depth_mask,
290 boolean depth_complement,
291 int stencil,
292 int depth_pass)
293 {
294 int stencil_fail = spe_allocate_available_register(f);
295 int depth_fail = spe_allocate_available_register(f);
296 int stencil_mask = spe_allocate_available_register(f);
297 int stencil_pass = spe_allocate_available_register(f);
298 int face_stencil = spe_allocate_available_register(f);
299 int stencil_src = stencil;
300 const unsigned ref = (sr->ref_value[face]
301 & dsa->stencil[face].valuemask);
302 boolean complement = FALSE;
303 int stored;
304 int tmp = spe_allocate_available_register(f);
305
306
307 if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
308 && (dsa->stencil[face].func != PIPE_FUNC_ALWAYS)
309 && (dsa->stencil[face].valuemask != 0x0ff)) {
310 stored = spe_allocate_available_register(f);
311 spe_andi(f, stored, stencil, dsa->stencil[face].valuemask);
312 } else {
313 stored = stencil;
314 }
315
316
317 switch (dsa->stencil[face].func) {
318 case PIPE_FUNC_NEVER:
319 spe_il(f, stencil_mask, 0); /* stencil_mask[0..3] = [0,0,0,0] */
320 break;
321
322 case PIPE_FUNC_NOTEQUAL:
323 complement = TRUE;
324 /* FALLTHROUGH */
325 case PIPE_FUNC_EQUAL:
326 /* stencil_mask[i] = (stored[i] == ref) */
327 spe_ceqi(f, stencil_mask, stored, ref);
328 break;
329
330 case PIPE_FUNC_LEQUAL:
331 complement = TRUE;
332 /* FALLTHROUGH */
333 case PIPE_FUNC_GREATER:
334 complement = TRUE;
335 /* stencil_mask[i] = (stored[i] > ref) */
336 spe_clgti(f, stencil_mask, stored, ref);
337 break;
338
339 case PIPE_FUNC_LESS:
340 complement = TRUE;
341 /* FALLTHROUGH */
342 case PIPE_FUNC_GEQUAL:
343 /* stencil_mask[i] = (stored[i] > ref) */
344 spe_clgti(f, stencil_mask, stored, ref);
345 /* tmp[i] = (stored[i] == ref) */
346 spe_ceqi(f, tmp, stored, ref);
347 /* stencil_mask[i] = stencil_mask[i] | tmp[i] */
348 spe_or(f, stencil_mask, stencil_mask, tmp);
349 break;
350
351 case PIPE_FUNC_ALWAYS:
352 /* See comment below. */
353 break;
354
355 default:
356 assert(0);
357 break;
358 }
359
360 if (stored != stencil) {
361 spe_release_register(f, stored);
362 }
363 spe_release_register(f, tmp);
364
365
366 /* ALWAYS is a very common stencil-test, so some effort is applied to
367 * optimize that case. The stencil-pass mask is the same as the input
368 * fragment mask. This makes the stencil-test (above) a no-op, and the
369 * input fragment mask can be "renamed" the stencil-pass mask.
370 */
371 if (dsa->stencil[face].func == PIPE_FUNC_ALWAYS) {
372 spe_release_register(f, stencil_pass);
373 stencil_pass = mask;
374 } else {
375 if (complement) {
376 spe_andc(f, stencil_pass, mask, stencil_mask);
377 } else {
378 spe_and(f, stencil_pass, mask, stencil_mask);
379 }
380 }
381
382 if (depth_complement) {
383 spe_andc(f, depth_pass, stencil_pass, depth_mask);
384 } else {
385 spe_and(f, depth_pass, stencil_pass, depth_mask);
386 }
387
388
389 /* Conditionally emit code to update the stencil value under various
390 * condititons. Note that there is no need to generate code under the
391 * following circumstances:
392 *
393 * - Stencil write mask is zero.
394 * - For stencil-fail if the stencil test is ALWAYS
395 * - For depth-fail if the stencil test is NEVER
396 * - For depth-pass if the stencil test is NEVER
397 * - Any of the 3 conditions if the operation is KEEP
398 */
399 if (dsa->stencil[face].writemask != 0) {
400 if ((dsa->stencil[face].func != PIPE_FUNC_ALWAYS)
401 && (dsa->stencil[face].fail_op != PIPE_STENCIL_OP_KEEP)) {
402 if (complement) {
403 spe_and(f, stencil_fail, mask, stencil_mask);
404 } else {
405 spe_andc(f, stencil_fail, mask, stencil_mask);
406 }
407
408 emit_stencil_op(f, face_stencil, stencil_src, stencil_fail,
409 dsa->stencil[face].fail_op,
410 sr->ref_value[face]);
411
412 stencil_src = face_stencil;
413 }
414
415 if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
416 && (dsa->stencil[face].zfail_op != PIPE_STENCIL_OP_KEEP)) {
417 if (depth_complement) {
418 spe_and(f, depth_fail, stencil_pass, depth_mask);
419 } else {
420 spe_andc(f, depth_fail, stencil_pass, depth_mask);
421 }
422
423 emit_stencil_op(f, face_stencil, stencil_src, depth_fail,
424 dsa->stencil[face].zfail_op,
425 sr->ref_value[face]);
426 stencil_src = face_stencil;
427 }
428
429 if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
430 && (dsa->stencil[face].zpass_op != PIPE_STENCIL_OP_KEEP)) {
431 emit_stencil_op(f, face_stencil, stencil_src, depth_pass,
432 dsa->stencil[face].zpass_op,
433 sr->ref_value[face]);
434 stencil_src = face_stencil;
435 }
436 }
437
438 spe_release_register(f, stencil_fail);
439 spe_release_register(f, depth_fail);
440 spe_release_register(f, stencil_mask);
441 if (stencil_pass != mask) {
442 spe_release_register(f, stencil_pass);
443 }
444
445 /* If all of the stencil operations were KEEP or the stencil write mask was
446 * zero, "stencil_src" will still be set to "stencil". In this case
447 * release the "face_stencil" register. Otherwise apply the stencil write
448 * mask to select bits from the calculated stencil value and the previous
449 * stencil value.
450 */
451 if (stencil_src == stencil) {
452 spe_release_register(f, face_stencil);
453 } else if (dsa->stencil[face].writemask != 0x0ff) {
454 int tmp = spe_allocate_available_register(f);
455
456 spe_il(f, tmp, dsa->stencil[face].writemask);
457 spe_selb(f, stencil_src, stencil, stencil_src, tmp);
458
459 spe_release_register(f, tmp);
460 }
461
462 return stencil_src;
463 }
464
465
466 void
467 cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa,
468 struct pipe_stencil_ref *sr)
469 {
470 struct pipe_depth_stencil_alpha_state *const dsa = &cdsa->base;
471 struct spe_function *const f = &cdsa->code;
472
473 /* This code generates a maximum of 6 (alpha test) + 3 (depth test)
474 * + 25 (front stencil) + 25 (back stencil) + 4 = 63 instructions. Round
475 * up to 64 to make it a happy power-of-two.
476 */
477 spe_init_func(f, SPE_INST_SIZE * 64);
478
479
480 /* Allocate registers for the function's input parameters. Cleverly (and
481 * clever code is usually dangerous, but I couldn't resist) the generated
482 * function returns a structure. Returned structures start with register
483 * 3, and the structure fields are ordered to match up exactly with the
484 * input parameters.
485 */
486 int mask = spe_allocate_register(f, 3);
487 int depth = spe_allocate_register(f, 4);
488 int stencil = spe_allocate_register(f, 5);
489 int zvals = spe_allocate_register(f, 6);
490 int frag_a = spe_allocate_register(f, 7);
491 int facing = spe_allocate_register(f, 8);
492
493 int depth_mask = spe_allocate_available_register(f);
494
495 boolean depth_complement;
496
497
498 emit_alpha_test(dsa, f, mask, frag_a);
499
500 depth_complement = emit_depth_test(dsa, f, depth_mask, depth, zvals);
501
502 if (dsa->stencil[0].enabled) {
503 const int front_depth_pass = spe_allocate_available_register(f);
504 int front_stencil = emit_stencil_test(dsa, sr, 0, f, mask,
505 depth_mask, depth_complement,
506 stencil, front_depth_pass);
507
508 if (dsa->stencil[1].enabled) {
509 const int back_depth_pass = spe_allocate_available_register(f);
510 int back_stencil = emit_stencil_test(dsa, sr, 1, f, mask,
511 depth_mask, depth_complement,
512 stencil, back_depth_pass);
513
514 /* If the front facing stencil value and the back facing stencil
515 * value are stored in the same register, there is no need to select
516 * a value based on the facing. This can happen if the stencil value
517 * was not modified due to the write masks being zero, the stencil
518 * operations being KEEP, etc.
519 */
520 if (front_stencil != back_stencil) {
521 spe_selb(f, stencil, back_stencil, front_stencil, facing);
522 }
523
524 if (back_stencil != stencil) {
525 spe_release_register(f, back_stencil);
526 }
527
528 if (front_stencil != stencil) {
529 spe_release_register(f, front_stencil);
530 }
531
532 spe_selb(f, mask, back_depth_pass, front_depth_pass, facing);
533
534 spe_release_register(f, back_depth_pass);
535 } else {
536 if (front_stencil != stencil) {
537 spe_or(f, stencil, front_stencil, front_stencil);
538 spe_release_register(f, front_stencil);
539 }
540 spe_or(f, mask, front_depth_pass, front_depth_pass);
541 }
542
543 spe_release_register(f, front_depth_pass);
544 } else if (dsa->depth.enabled) {
545 if (depth_complement) {
546 spe_andc(f, mask, mask, depth_mask);
547 } else {
548 spe_and(f, mask, mask, depth_mask);
549 }
550 }
551
552 if (dsa->depth.writemask) {
553 spe_selb(f, depth, depth, zvals, mask);
554 }
555
556 spe_bi(f, 0, 0, 0); /* return from function call */
557
558
559 #if 0
560 {
561 const uint32_t *p = f->store;
562 unsigned i;
563
564 printf("# alpha (%sabled)\n",
565 (dsa->alpha.enabled) ? "en" : "dis");
566 printf("# func: %u\n", dsa->alpha.func);
567 printf("# ref: %.2f\n", dsa->alpha.ref);
568
569 printf("# depth (%sabled)\n",
570 (dsa->depth.enabled) ? "en" : "dis");
571 printf("# func: %u\n", dsa->depth.func);
572
573 for (i = 0; i < 2; i++) {
574 printf("# %s stencil (%sabled)\n",
575 (i == 0) ? "front" : "back",
576 (dsa->stencil[i].enabled) ? "en" : "dis");
577
578 printf("# func: %u\n", dsa->stencil[i].func);
579 printf("# op (sf, zf, zp): %u %u %u\n",
580 dsa->stencil[i].fail_op,
581 dsa->stencil[i].zfail_op,
582 dsa->stencil[i].zpass_op);
583 printf("# ref value / value mask / write mask: %02x %02x %02x\n",
584 sr->ref_value[i],
585 dsa->stencil[i].valuemask,
586 dsa->stencil[i].writemask);
587 }
588
589 printf("\t.text\n");
590 for (/* empty */; p < f->csr; p++) {
591 printf("\t.long\t0x%04x\n", *p);
592 }
593 fflush(stdout);
594 }
595 #endif
596 }
597
598
599 /**
600 * \note Emits a maximum of 3 instructions
601 */
602 static int
603 emit_alpha_factor_calculation(struct spe_function *f,
604 unsigned factor,
605 int src_alpha, int dst_alpha, int const_alpha)
606 {
607 int factor_reg;
608 int tmp;
609
610
611 switch (factor) {
612 case PIPE_BLENDFACTOR_ONE:
613 factor_reg = -1;
614 break;
615
616 case PIPE_BLENDFACTOR_SRC_ALPHA:
617 factor_reg = spe_allocate_available_register(f);
618
619 spe_or(f, factor_reg, src_alpha, src_alpha);
620 break;
621
622 case PIPE_BLENDFACTOR_DST_ALPHA:
623 factor_reg = dst_alpha;
624 break;
625
626 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
627 factor_reg = -1;
628 break;
629
630 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
631 factor_reg = spe_allocate_available_register(f);
632
633 tmp = spe_allocate_available_register(f);
634 spe_il(f, tmp, 1);
635 spe_cuflt(f, tmp, tmp, 0);
636 spe_fs(f, factor_reg, tmp, const_alpha);
637 spe_release_register(f, tmp);
638 break;
639
640 case PIPE_BLENDFACTOR_CONST_ALPHA:
641 factor_reg = const_alpha;
642 break;
643
644 case PIPE_BLENDFACTOR_ZERO:
645 factor_reg = -1;
646 break;
647
648 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
649 tmp = spe_allocate_available_register(f);
650 factor_reg = spe_allocate_available_register(f);
651
652 spe_il(f, tmp, 1);
653 spe_cuflt(f, tmp, tmp, 0);
654 spe_fs(f, factor_reg, tmp, src_alpha);
655
656 spe_release_register(f, tmp);
657 break;
658
659 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
660 tmp = spe_allocate_available_register(f);
661 factor_reg = spe_allocate_available_register(f);
662
663 spe_il(f, tmp, 1);
664 spe_cuflt(f, tmp, tmp, 0);
665 spe_fs(f, factor_reg, tmp, dst_alpha);
666
667 spe_release_register(f, tmp);
668 break;
669
670 case PIPE_BLENDFACTOR_SRC1_ALPHA:
671 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
672 default:
673 assert(0);
674 factor_reg = -1;
675 break;
676 }
677
678 return factor_reg;
679 }
680
681
682 /**
683 * \note Emits a maximum of 6 instructions
684 */
685 static void
686 emit_color_factor_calculation(struct spe_function *f,
687 unsigned sF, unsigned mask,
688 const int *src,
689 const int *dst,
690 const int *const_color,
691 int *factor)
692 {
693 int tmp;
694 unsigned i;
695
696
697 factor[0] = -1;
698 factor[1] = -1;
699 factor[2] = -1;
700 factor[3] = -1;
701
702 switch (sF) {
703 case PIPE_BLENDFACTOR_ONE:
704 break;
705
706 case PIPE_BLENDFACTOR_SRC_COLOR:
707 for (i = 0; i < 3; ++i) {
708 if ((mask & (1U << i)) != 0) {
709 factor[i] = spe_allocate_available_register(f);
710 spe_or(f, factor[i], src[i], src[i]);
711 }
712 }
713 break;
714
715 case PIPE_BLENDFACTOR_SRC_ALPHA:
716 factor[0] = spe_allocate_available_register(f);
717 factor[1] = factor[0];
718 factor[2] = factor[0];
719
720 spe_or(f, factor[0], src[3], src[3]);
721 break;
722
723 case PIPE_BLENDFACTOR_DST_ALPHA:
724 factor[0] = dst[3];
725 factor[1] = dst[3];
726 factor[2] = dst[3];
727 break;
728
729 case PIPE_BLENDFACTOR_DST_COLOR:
730 factor[0] = dst[0];
731 factor[1] = dst[1];
732 factor[2] = dst[2];
733 break;
734
735 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
736 tmp = spe_allocate_available_register(f);
737 factor[0] = spe_allocate_available_register(f);
738 factor[1] = factor[0];
739 factor[2] = factor[0];
740
741 /* Alpha saturate means min(As, 1-Ad).
742 */
743 spe_il(f, tmp, 1);
744 spe_cuflt(f, tmp, tmp, 0);
745 spe_fs(f, tmp, tmp, dst[3]);
746 spe_fcgt(f, factor[0], tmp, src[3]);
747 spe_selb(f, factor[0], src[3], tmp, factor[0]);
748
749 spe_release_register(f, tmp);
750 break;
751
752 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
753 tmp = spe_allocate_available_register(f);
754 spe_il(f, tmp, 1);
755 spe_cuflt(f, tmp, tmp, 0);
756
757 for (i = 0; i < 3; i++) {
758 factor[i] = spe_allocate_available_register(f);
759
760 spe_fs(f, factor[i], tmp, const_color[i]);
761 }
762 spe_release_register(f, tmp);
763 break;
764
765 case PIPE_BLENDFACTOR_CONST_COLOR:
766 for (i = 0; i < 3; i++) {
767 factor[i] = const_color[i];
768 }
769 break;
770
771 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
772 factor[0] = spe_allocate_available_register(f);
773 factor[1] = factor[0];
774 factor[2] = factor[0];
775
776 tmp = spe_allocate_available_register(f);
777 spe_il(f, tmp, 1);
778 spe_cuflt(f, tmp, tmp, 0);
779 spe_fs(f, factor[0], tmp, const_color[3]);
780 spe_release_register(f, tmp);
781 break;
782
783 case PIPE_BLENDFACTOR_CONST_ALPHA:
784 factor[0] = const_color[3];
785 factor[1] = factor[0];
786 factor[2] = factor[0];
787 break;
788
789 case PIPE_BLENDFACTOR_ZERO:
790 break;
791
792 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
793 tmp = spe_allocate_available_register(f);
794
795 spe_il(f, tmp, 1);
796 spe_cuflt(f, tmp, tmp, 0);
797
798 for (i = 0; i < 3; ++i) {
799 if ((mask & (1U << i)) != 0) {
800 factor[i] = spe_allocate_available_register(f);
801 spe_fs(f, factor[i], tmp, src[i]);
802 }
803 }
804
805 spe_release_register(f, tmp);
806 break;
807
808 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
809 tmp = spe_allocate_available_register(f);
810 factor[0] = spe_allocate_available_register(f);
811 factor[1] = factor[0];
812 factor[2] = factor[0];
813
814 spe_il(f, tmp, 1);
815 spe_cuflt(f, tmp, tmp, 0);
816 spe_fs(f, factor[0], tmp, src[3]);
817
818 spe_release_register(f, tmp);
819 break;
820
821 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
822 tmp = spe_allocate_available_register(f);
823 factor[0] = spe_allocate_available_register(f);
824 factor[1] = factor[0];
825 factor[2] = factor[0];
826
827 spe_il(f, tmp, 1);
828 spe_cuflt(f, tmp, tmp, 0);
829 spe_fs(f, factor[0], tmp, dst[3]);
830
831 spe_release_register(f, tmp);
832 break;
833
834 case PIPE_BLENDFACTOR_INV_DST_COLOR:
835 tmp = spe_allocate_available_register(f);
836
837 spe_il(f, tmp, 1);
838 spe_cuflt(f, tmp, tmp, 0);
839
840 for (i = 0; i < 3; ++i) {
841 if ((mask & (1U << i)) != 0) {
842 factor[i] = spe_allocate_available_register(f);
843 spe_fs(f, factor[i], tmp, dst[i]);
844 }
845 }
846
847 spe_release_register(f, tmp);
848 break;
849
850 case PIPE_BLENDFACTOR_SRC1_COLOR:
851 case PIPE_BLENDFACTOR_SRC1_ALPHA:
852 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
853 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
854 default:
855 assert(0);
856 }
857 }
858
859
860 static void
861 emit_blend_calculation(struct spe_function *f,
862 unsigned func, unsigned sF, unsigned dF,
863 int src, int src_factor, int dst, int dst_factor)
864 {
865 int tmp = spe_allocate_available_register(f);
866
867 switch (func) {
868 case PIPE_BLEND_ADD:
869 if (sF == PIPE_BLENDFACTOR_ONE) {
870 if (dF == PIPE_BLENDFACTOR_ZERO) {
871 /* Do nothing. */
872 } else if (dF == PIPE_BLENDFACTOR_ONE) {
873 spe_fa(f, src, src, dst);
874 }
875 } else if (sF == PIPE_BLENDFACTOR_ZERO) {
876 if (dF == PIPE_BLENDFACTOR_ZERO) {
877 spe_il(f, src, 0);
878 } else if (dF == PIPE_BLENDFACTOR_ONE) {
879 spe_or(f, src, dst, dst);
880 } else {
881 spe_fm(f, src, dst, dst_factor);
882 }
883 } else if (dF == PIPE_BLENDFACTOR_ZERO) {
884 spe_fm(f, src, src, src_factor);
885 } else {
886 spe_fm(f, tmp, dst, dst_factor);
887 spe_fma(f, src, src, src_factor, tmp);
888 }
889 break;
890
891 case PIPE_BLEND_SUBTRACT:
892 if (sF == PIPE_BLENDFACTOR_ONE) {
893 if (dF == PIPE_BLENDFACTOR_ZERO) {
894 /* Do nothing. */
895 } else if (dF == PIPE_BLENDFACTOR_ONE) {
896 spe_fs(f, src, src, dst);
897 }
898 } else if (sF == PIPE_BLENDFACTOR_ZERO) {
899 if (dF == PIPE_BLENDFACTOR_ZERO) {
900 spe_il(f, src, 0);
901 } else if (dF == PIPE_BLENDFACTOR_ONE) {
902 spe_il(f, tmp, 0);
903 spe_fs(f, src, tmp, dst);
904 } else {
905 spe_fm(f, src, dst, dst_factor);
906 }
907 } else if (dF == PIPE_BLENDFACTOR_ZERO) {
908 spe_fm(f, src, src, src_factor);
909 } else {
910 spe_fm(f, tmp, dst, dst_factor);
911 spe_fms(f, src, src, src_factor, tmp);
912 }
913 break;
914
915 case PIPE_BLEND_REVERSE_SUBTRACT:
916 if (sF == PIPE_BLENDFACTOR_ONE) {
917 if (dF == PIPE_BLENDFACTOR_ZERO) {
918 spe_il(f, tmp, 0);
919 spe_fs(f, src, tmp, src);
920 } else if (dF == PIPE_BLENDFACTOR_ONE) {
921 spe_fs(f, src, dst, src);
922 }
923 } else if (sF == PIPE_BLENDFACTOR_ZERO) {
924 if (dF == PIPE_BLENDFACTOR_ZERO) {
925 spe_il(f, src, 0);
926 } else if (dF == PIPE_BLENDFACTOR_ONE) {
927 spe_or(f, src, dst, dst);
928 } else {
929 spe_fm(f, src, dst, dst_factor);
930 }
931 } else if (dF == PIPE_BLENDFACTOR_ZERO) {
932 spe_fm(f, src, src, src_factor);
933 } else {
934 spe_fm(f, tmp, src, src_factor);
935 spe_fms(f, src, src, dst_factor, tmp);
936 }
937 break;
938
939 case PIPE_BLEND_MIN:
940 spe_cgt(f, tmp, src, dst);
941 spe_selb(f, src, src, dst, tmp);
942 break;
943
944 case PIPE_BLEND_MAX:
945 spe_cgt(f, tmp, src, dst);
946 spe_selb(f, src, dst, src, tmp);
947 break;
948
949 default:
950 assert(0);
951 }
952
953 spe_release_register(f, tmp);
954 }
955
956
957 /**
958 * Generate code to perform alpha blending on the SPE
959 */
960 void
961 cell_generate_alpha_blend(struct cell_blend_state *cb)
962 {
963 struct pipe_blend_state *const b = &cb->base;
964 struct spe_function *const f = &cb->code;
965
966 /* This code generates a maximum of 3 (source alpha factor)
967 * + 3 (destination alpha factor) + (3 * 6) (source color factor)
968 * + (3 * 6) (destination color factor) + (4 * 2) (blend equation)
969 * + 4 (fragment mask) + 1 (return) = 55 instlructions. Round up to 64 to
970 * make it a happy power-of-two.
971 */
972 spe_init_func(f, SPE_INST_SIZE * 64);
973
974
975 const int frag[4] = {
976 spe_allocate_register(f, 3),
977 spe_allocate_register(f, 4),
978 spe_allocate_register(f, 5),
979 spe_allocate_register(f, 6),
980 };
981 const int pixel[4] = {
982 spe_allocate_register(f, 7),
983 spe_allocate_register(f, 8),
984 spe_allocate_register(f, 9),
985 spe_allocate_register(f, 10),
986 };
987 const int const_color[4] = {
988 spe_allocate_register(f, 11),
989 spe_allocate_register(f, 12),
990 spe_allocate_register(f, 13),
991 spe_allocate_register(f, 14),
992 };
993 unsigned func[4];
994 unsigned sF[4];
995 unsigned dF[4];
996 unsigned i;
997 int src_factor[4];
998 int dst_factor[4];
999
1000
1001 /* Does the selected blend mode make use of the source / destination
1002 * color (RGB) blend factors?
1003 */
1004 boolean need_color_factor = b->rt[0].blend_enable
1005 && (b->rt[0].rgb_func != PIPE_BLEND_MIN)
1006 && (b->rt[0].rgb_func != PIPE_BLEND_MAX);
1007
1008 /* Does the selected blend mode make use of the source / destination
1009 * alpha blend factors?
1010 */
1011 boolean need_alpha_factor = b->rt[0].blend_enable
1012 && (b->rt[0].alpha_func != PIPE_BLEND_MIN)
1013 && (b->rt[0].alpha_func != PIPE_BLEND_MAX);
1014
1015
1016 if (b->rt[0].blend_enable) {
1017 sF[0] = b->rt[0].rgb_src_factor;
1018 sF[1] = sF[0];
1019 sF[2] = sF[0];
1020 switch (b->rt[0].alpha_src_factor & 0x0f) {
1021 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
1022 sF[3] = PIPE_BLENDFACTOR_ONE;
1023 break;
1024 case PIPE_BLENDFACTOR_SRC_COLOR:
1025 case PIPE_BLENDFACTOR_DST_COLOR:
1026 case PIPE_BLENDFACTOR_CONST_COLOR:
1027 case PIPE_BLENDFACTOR_SRC1_COLOR:
1028 sF[3] = b->rt[0].alpha_src_factor + 1;
1029 break;
1030 default:
1031 sF[3] = b->rt[0].alpha_src_factor;
1032 }
1033
1034 dF[0] = b->rt[0].rgb_dst_factor;
1035 dF[1] = dF[0];
1036 dF[2] = dF[0];
1037 switch (b->rt[0].alpha_dst_factor & 0x0f) {
1038 case PIPE_BLENDFACTOR_SRC_COLOR:
1039 case PIPE_BLENDFACTOR_DST_COLOR:
1040 case PIPE_BLENDFACTOR_CONST_COLOR:
1041 case PIPE_BLENDFACTOR_SRC1_COLOR:
1042 dF[3] = b->rt[0].alpha_dst_factor + 1;
1043 break;
1044 default:
1045 dF[3] = b->rt[0].alpha_dst_factor;
1046 }
1047
1048 func[0] = b->rt[0].rgb_func;
1049 func[1] = func[0];
1050 func[2] = func[0];
1051 func[3] = b->rt[0].alpha_func;
1052 } else {
1053 sF[0] = PIPE_BLENDFACTOR_ONE;
1054 sF[1] = PIPE_BLENDFACTOR_ONE;
1055 sF[2] = PIPE_BLENDFACTOR_ONE;
1056 sF[3] = PIPE_BLENDFACTOR_ONE;
1057 dF[0] = PIPE_BLENDFACTOR_ZERO;
1058 dF[1] = PIPE_BLENDFACTOR_ZERO;
1059 dF[2] = PIPE_BLENDFACTOR_ZERO;
1060 dF[3] = PIPE_BLENDFACTOR_ZERO;
1061
1062 func[0] = PIPE_BLEND_ADD;
1063 func[1] = PIPE_BLEND_ADD;
1064 func[2] = PIPE_BLEND_ADD;
1065 func[3] = PIPE_BLEND_ADD;
1066 }
1067
1068
1069 /* If alpha writing is enabled and the alpha blend mode requires use of
1070 * the alpha factor, calculate the alpha factor.
1071 */
1072 if (((b->rt[0].colormask & 8) != 0) && need_alpha_factor) {
1073 src_factor[3] = emit_alpha_factor_calculation(f, sF[3], const_color[3],
1074 frag[3], pixel[3]);
1075
1076 /* If the alpha destination blend factor is the same as the alpha source
1077 * blend factor, re-use the previously calculated value.
1078 */
1079 dst_factor[3] = (dF[3] == sF[3])
1080 ? src_factor[3]
1081 : emit_alpha_factor_calculation(f, dF[3], const_color[3],
1082 frag[3], pixel[3]);
1083 }
1084
1085
1086 if (sF[0] == sF[3]) {
1087 src_factor[0] = src_factor[3];
1088 src_factor[1] = src_factor[3];
1089 src_factor[2] = src_factor[3];
1090 } else if (sF[0] == dF[3]) {
1091 src_factor[0] = dst_factor[3];
1092 src_factor[1] = dst_factor[3];
1093 src_factor[2] = dst_factor[3];
1094 } else if (need_color_factor) {
1095 emit_color_factor_calculation(f,
1096 b->rt[0].rgb_src_factor,
1097 b->rt[0].colormask,
1098 frag, pixel, const_color, src_factor);
1099 }
1100
1101
1102 if (dF[0] == sF[3]) {
1103 dst_factor[0] = src_factor[3];
1104 dst_factor[1] = src_factor[3];
1105 dst_factor[2] = src_factor[3];
1106 } else if (dF[0] == dF[3]) {
1107 dst_factor[0] = dst_factor[3];
1108 dst_factor[1] = dst_factor[3];
1109 dst_factor[2] = dst_factor[3];
1110 } else if (dF[0] == sF[0]) {
1111 dst_factor[0] = src_factor[0];
1112 dst_factor[1] = src_factor[1];
1113 dst_factor[2] = src_factor[2];
1114 } else if (need_color_factor) {
1115 emit_color_factor_calculation(f,
1116 b->rt[0].rgb_dst_factor,
1117 b->rt[0].colormask,
1118 frag, pixel, const_color, dst_factor);
1119 }
1120
1121
1122
1123 for (i = 0; i < 4; ++i) {
1124 if ((b->rt[0].colormask & (1U << i)) != 0) {
1125 emit_blend_calculation(f,
1126 func[i], sF[i], dF[i],
1127 frag[i], src_factor[i],
1128 pixel[i], dst_factor[i]);
1129 }
1130 }
1131
1132 spe_bi(f, 0, 0, 0);
1133
1134 #if 0
1135 {
1136 const uint32_t *p = f->store;
1137
1138 printf("# %u instructions\n", f->csr - f->store);
1139 printf("# blend (%sabled)\n",
1140 (cb->base.blend_enable) ? "en" : "dis");
1141 printf("# RGB func / sf / df: %u %u %u\n",
1142 cb->base.rgb_func,
1143 cb->base.rgb_src_factor,
1144 cb->base.rgb_dst_factor);
1145 printf("# ALP func / sf / df: %u %u %u\n",
1146 cb->base.alpha_func,
1147 cb->base.alpha_src_factor,
1148 cb->base.alpha_dst_factor);
1149
1150 printf("\t.text\n");
1151 for (/* empty */; p < f->csr; p++) {
1152 printf("\t.long\t0x%04x\n", *p);
1153 }
1154 fflush(stdout);
1155 }
1156 #endif
1157 }
1158
1159
1160 static int
1161 PC_OFFSET(const struct spe_function *f, const void *d)
1162 {
1163 const intptr_t pc = (intptr_t) &f->store[f->num_inst];
1164 const intptr_t ea = ~0x0f & (intptr_t) d;
1165
1166 return (ea - pc) >> 2;
1167 }
1168
1169
1170 /**
1171 * Generate code to perform color conversion and logic op
1172 *
1173 * \bug
1174 * The code generated by this function should also perform dithering.
1175 *
1176 * \bug
1177 * The code generated by this function should also perform color-write
1178 * masking.
1179 *
1180 * \bug
1181 * Only two framebuffer formats are supported at this time.
1182 */
1183 void
1184 cell_generate_logic_op(struct spe_function *f,
1185 const struct pipe_blend_state *blend,
1186 struct pipe_surface *surf)
1187 {
1188 const unsigned logic_op = (blend->logicop_enable)
1189 ? blend->logicop_func : PIPE_LOGICOP_COPY;
1190
1191 /* This code generates a maximum of 37 instructions. An additional 32
1192 * bytes (equiv. to 8 instructions) are needed for data storage. Round up
1193 * to 64 to make it a happy power-of-two.
1194 */
1195 spe_init_func(f, SPE_INST_SIZE * 64);
1196
1197
1198 /* Pixel colors in framebuffer format in AoS layout.
1199 */
1200 const int pixel[4] = {
1201 spe_allocate_register(f, 3),
1202 spe_allocate_register(f, 4),
1203 spe_allocate_register(f, 5),
1204 spe_allocate_register(f, 6),
1205 };
1206
1207 /* Fragment colors stored as floats in SoA layout.
1208 */
1209 const int frag[4] = {
1210 spe_allocate_register(f, 7),
1211 spe_allocate_register(f, 8),
1212 spe_allocate_register(f, 9),
1213 spe_allocate_register(f, 10),
1214 };
1215
1216 const int mask = spe_allocate_register(f, 11);
1217
1218
1219 /* Short-circuit the noop and invert cases.
1220 */
1221 if ((logic_op == PIPE_LOGICOP_NOOP) || (blend->rt[0].colormask == 0)) {
1222 spe_bi(f, 0, 0, 0);
1223 return;
1224 } else if (logic_op == PIPE_LOGICOP_INVERT) {
1225 spe_nor(f, pixel[0], pixel[0], pixel[0]);
1226 spe_nor(f, pixel[1], pixel[1], pixel[1]);
1227 spe_nor(f, pixel[2], pixel[2], pixel[2]);
1228 spe_nor(f, pixel[3], pixel[3], pixel[3]);
1229 spe_bi(f, 0, 0, 0);
1230 return;
1231 }
1232
1233
1234 const int tmp[4] = {
1235 spe_allocate_available_register(f),
1236 spe_allocate_available_register(f),
1237 spe_allocate_available_register(f),
1238 spe_allocate_available_register(f),
1239 };
1240
1241 const int shuf_xpose_hi = spe_allocate_available_register(f);
1242 const int shuf_xpose_lo = spe_allocate_available_register(f);
1243 const int shuf_color = spe_allocate_available_register(f);
1244
1245
1246 /* Pointer to the begining of the function's private data area.
1247 */
1248 uint32_t *const data = ((uint32_t *) f->store) + (64 - 8);
1249
1250
1251 /* Convert fragment colors to framebuffer format in AoS layout.
1252 */
1253 switch (surf->format) {
1254 case PIPE_FORMAT_B8G8R8A8_UNORM:
1255 data[0] = 0x00010203;
1256 data[1] = 0x10111213;
1257 data[2] = 0x04050607;
1258 data[3] = 0x14151617;
1259 data[4] = 0x0c000408;
1260 data[5] = 0x80808080;
1261 data[6] = 0x80808080;
1262 data[7] = 0x80808080;
1263 break;
1264 case PIPE_FORMAT_A8R8G8B8_UNORM:
1265 data[0] = 0x03020100;
1266 data[1] = 0x13121110;
1267 data[2] = 0x07060504;
1268 data[3] = 0x17161514;
1269 data[4] = 0x0804000c;
1270 data[5] = 0x80808080;
1271 data[6] = 0x80808080;
1272 data[7] = 0x80808080;
1273 break;
1274 default:
1275 fprintf(stderr, "CELL: Bad pixel format in cell_generate_logic_op()");
1276 ASSERT(0);
1277 }
1278
1279 spe_ilh(f, tmp[0], 0x0808);
1280 spe_lqr(f, shuf_xpose_hi, PC_OFFSET(f, data+0));
1281 spe_lqr(f, shuf_color, PC_OFFSET(f, data+4));
1282 spe_a(f, shuf_xpose_lo, shuf_xpose_hi, tmp[0]);
1283
1284 spe_shufb(f, tmp[0], frag[0], frag[2], shuf_xpose_hi);
1285 spe_shufb(f, tmp[1], frag[0], frag[2], shuf_xpose_lo);
1286 spe_shufb(f, tmp[2], frag[1], frag[3], shuf_xpose_hi);
1287 spe_shufb(f, tmp[3], frag[1], frag[3], shuf_xpose_lo);
1288
1289 spe_shufb(f, frag[0], tmp[0], tmp[2], shuf_xpose_hi);
1290 spe_shufb(f, frag[1], tmp[0], tmp[2], shuf_xpose_lo);
1291 spe_shufb(f, frag[2], tmp[1], tmp[3], shuf_xpose_hi);
1292 spe_shufb(f, frag[3], tmp[1], tmp[3], shuf_xpose_lo);
1293
1294 spe_cfltu(f, frag[0], frag[0], 32);
1295 spe_cfltu(f, frag[1], frag[1], 32);
1296 spe_cfltu(f, frag[2], frag[2], 32);
1297 spe_cfltu(f, frag[3], frag[3], 32);
1298
1299 spe_shufb(f, frag[0], frag[0], pixel[0], shuf_color);
1300 spe_shufb(f, frag[1], frag[1], pixel[1], shuf_color);
1301 spe_shufb(f, frag[2], frag[2], pixel[2], shuf_color);
1302 spe_shufb(f, frag[3], frag[3], pixel[3], shuf_color);
1303
1304
1305 /* If logic op is enabled, perform the requested logical operation on the
1306 * converted fragment colors and the pixel colors.
1307 */
1308 switch (logic_op) {
1309 case PIPE_LOGICOP_CLEAR:
1310 spe_il(f, frag[0], 0);
1311 spe_il(f, frag[1], 0);
1312 spe_il(f, frag[2], 0);
1313 spe_il(f, frag[3], 0);
1314 break;
1315 case PIPE_LOGICOP_NOR:
1316 spe_nor(f, frag[0], frag[0], pixel[0]);
1317 spe_nor(f, frag[1], frag[1], pixel[1]);
1318 spe_nor(f, frag[2], frag[2], pixel[2]);
1319 spe_nor(f, frag[3], frag[3], pixel[3]);
1320 break;
1321 case PIPE_LOGICOP_AND_INVERTED:
1322 spe_andc(f, frag[0], pixel[0], frag[0]);
1323 spe_andc(f, frag[1], pixel[1], frag[1]);
1324 spe_andc(f, frag[2], pixel[2], frag[2]);
1325 spe_andc(f, frag[3], pixel[3], frag[3]);
1326 break;
1327 case PIPE_LOGICOP_COPY_INVERTED:
1328 spe_nor(f, frag[0], frag[0], frag[0]);
1329 spe_nor(f, frag[1], frag[1], frag[1]);
1330 spe_nor(f, frag[2], frag[2], frag[2]);
1331 spe_nor(f, frag[3], frag[3], frag[3]);
1332 break;
1333 case PIPE_LOGICOP_AND_REVERSE:
1334 spe_andc(f, frag[0], frag[0], pixel[0]);
1335 spe_andc(f, frag[1], frag[1], pixel[1]);
1336 spe_andc(f, frag[2], frag[2], pixel[2]);
1337 spe_andc(f, frag[3], frag[3], pixel[3]);
1338 break;
1339 case PIPE_LOGICOP_XOR:
1340 spe_xor(f, frag[0], frag[0], pixel[0]);
1341 spe_xor(f, frag[1], frag[1], pixel[1]);
1342 spe_xor(f, frag[2], frag[2], pixel[2]);
1343 spe_xor(f, frag[3], frag[3], pixel[3]);
1344 break;
1345 case PIPE_LOGICOP_NAND:
1346 spe_nand(f, frag[0], frag[0], pixel[0]);
1347 spe_nand(f, frag[1], frag[1], pixel[1]);
1348 spe_nand(f, frag[2], frag[2], pixel[2]);
1349 spe_nand(f, frag[3], frag[3], pixel[3]);
1350 break;
1351 case PIPE_LOGICOP_AND:
1352 spe_and(f, frag[0], frag[0], pixel[0]);
1353 spe_and(f, frag[1], frag[1], pixel[1]);
1354 spe_and(f, frag[2], frag[2], pixel[2]);
1355 spe_and(f, frag[3], frag[3], pixel[3]);
1356 break;
1357 case PIPE_LOGICOP_EQUIV:
1358 spe_eqv(f, frag[0], frag[0], pixel[0]);
1359 spe_eqv(f, frag[1], frag[1], pixel[1]);
1360 spe_eqv(f, frag[2], frag[2], pixel[2]);
1361 spe_eqv(f, frag[3], frag[3], pixel[3]);
1362 break;
1363 case PIPE_LOGICOP_OR_INVERTED:
1364 spe_orc(f, frag[0], pixel[0], frag[0]);
1365 spe_orc(f, frag[1], pixel[1], frag[1]);
1366 spe_orc(f, frag[2], pixel[2], frag[2]);
1367 spe_orc(f, frag[3], pixel[3], frag[3]);
1368 break;
1369 case PIPE_LOGICOP_COPY:
1370 break;
1371 case PIPE_LOGICOP_OR_REVERSE:
1372 spe_orc(f, frag[0], frag[0], pixel[0]);
1373 spe_orc(f, frag[1], frag[1], pixel[1]);
1374 spe_orc(f, frag[2], frag[2], pixel[2]);
1375 spe_orc(f, frag[3], frag[3], pixel[3]);
1376 break;
1377 case PIPE_LOGICOP_OR:
1378 spe_or(f, frag[0], frag[0], pixel[0]);
1379 spe_or(f, frag[1], frag[1], pixel[1]);
1380 spe_or(f, frag[2], frag[2], pixel[2]);
1381 spe_or(f, frag[3], frag[3], pixel[3]);
1382 break;
1383 case PIPE_LOGICOP_SET:
1384 spe_il(f, frag[0], ~0);
1385 spe_il(f, frag[1], ~0);
1386 spe_il(f, frag[2], ~0);
1387 spe_il(f, frag[3], ~0);
1388 break;
1389
1390 /* These two cases are short-circuited above.
1391 */
1392 case PIPE_LOGICOP_INVERT:
1393 case PIPE_LOGICOP_NOOP:
1394 default:
1395 assert(0);
1396 }
1397
1398
1399 /* Apply fragment mask.
1400 */
1401 spe_ilh(f, tmp[0], 0x0000);
1402 spe_ilh(f, tmp[1], 0x0404);
1403 spe_ilh(f, tmp[2], 0x0808);
1404 spe_ilh(f, tmp[3], 0x0c0c);
1405
1406 spe_shufb(f, tmp[0], mask, mask, tmp[0]);
1407 spe_shufb(f, tmp[1], mask, mask, tmp[1]);
1408 spe_shufb(f, tmp[2], mask, mask, tmp[2]);
1409 spe_shufb(f, tmp[3], mask, mask, tmp[3]);
1410
1411 spe_selb(f, pixel[0], pixel[0], frag[0], tmp[0]);
1412 spe_selb(f, pixel[1], pixel[1], frag[1], tmp[1]);
1413 spe_selb(f, pixel[2], pixel[2], frag[2], tmp[2]);
1414 spe_selb(f, pixel[3], pixel[3], frag[3], tmp[3]);
1415
1416 spe_bi(f, 0, 0, 0);
1417
1418 #if 0
1419 {
1420 const uint32_t *p = f->store;
1421 unsigned i;
1422
1423 printf("# %u instructions\n", f->csr - f->store);
1424
1425 printf("\t.text\n");
1426 for (i = 0; i < 64; i++) {
1427 printf("\t.long\t0x%04x\n", p[i]);
1428 }
1429 fflush(stdout);
1430 }
1431 #endif
1432 }