gallium: standardize naming of masks
[mesa.git] / src / gallium / drivers / cell / ppu / cell_state_per_fragment.c
1 /*
2 * (C) Copyright IBM Corporation 2008
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file
27 * Generate code to perform all per-fragment operations.
28 *
29 * Code generated by these functions perform both alpha, depth, and stencil
30 * testing as well as alpha blending.
31 *
32 * \note
33 * Occlusion query is not supported, but this is the right place to add that
34 * support.
35 *
36 * \author Ian Romanick <idr@us.ibm.com>
37 */
38
39 #include "pipe/p_defines.h"
40 #include "pipe/p_state.h"
41
42 #include "cell_context.h"
43
44 #include "rtasm/rtasm_ppc_spe.h"
45
46
47 /**
48 * Generate code to perform alpha testing.
49 *
50 * The code generated by this function uses the register specificed by
51 * \c mask as both an input and an output.
52 *
53 * \param dsa Current alpha-test state
54 * \param f Function to which code should be appended
55 * \param mask Index of register containing active fragment mask
56 * \param alphas Index of register containing per-fragment alpha values
57 *
58 * \note Emits a maximum of 6 instructions.
59 */
60 static void
61 emit_alpha_test(struct pipe_depth_stencil_alpha_state *dsa,
62 struct spe_function *f, int mask, int alphas)
63 {
64 /* If the alpha function is either NEVER or ALWAYS, there is no need to
65 * load the reference value into a register. ALWAYS is a fairly common
66 * case, and this optimization saves 2 instructions.
67 */
68 if (dsa->alpha.enabled
69 && (dsa->alpha.func != PIPE_FUNC_NEVER)
70 && (dsa->alpha.func != PIPE_FUNC_ALWAYS)) {
71 int ref = spe_allocate_available_register(f);
72 int tmp_a = spe_allocate_available_register(f);
73 int tmp_b = spe_allocate_available_register(f);
74 union {
75 float f;
76 unsigned u;
77 } ref_val;
78 boolean complement = FALSE;
79
80 ref_val.f = dsa->alpha.ref;
81
82 spe_il(f, ref, ref_val.u & 0x0000ffff);
83 spe_ilh(f, ref, ref_val.u >> 16);
84
85 switch (dsa->alpha.func) {
86 case PIPE_FUNC_NOTEQUAL:
87 complement = TRUE;
88 /* FALLTHROUGH */
89
90 case PIPE_FUNC_EQUAL:
91 spe_fceq(f, tmp_a, ref, alphas);
92 break;
93
94 case PIPE_FUNC_LEQUAL:
95 complement = TRUE;
96 /* FALLTHROUGH */
97
98 case PIPE_FUNC_GREATER:
99 spe_fcgt(f, tmp_a, ref, alphas);
100 break;
101
102 case PIPE_FUNC_LESS:
103 complement = TRUE;
104 /* FALLTHROUGH */
105
106 case PIPE_FUNC_GEQUAL:
107 spe_fcgt(f, tmp_a, ref, alphas);
108 spe_fceq(f, tmp_b, ref, alphas);
109 spe_or(f, tmp_a, tmp_b, tmp_a);
110 break;
111
112 case PIPE_FUNC_ALWAYS:
113 case PIPE_FUNC_NEVER:
114 default:
115 assert(0);
116 break;
117 }
118
119 if (complement) {
120 spe_andc(f, mask, mask, tmp_a);
121 } else {
122 spe_and(f, mask, mask, tmp_a);
123 }
124
125 spe_release_register(f, ref);
126 spe_release_register(f, tmp_a);
127 spe_release_register(f, tmp_b);
128 } else if (dsa->alpha.enabled && (dsa->alpha.func == PIPE_FUNC_NEVER)) {
129 spe_il(f, mask, 0);
130 }
131 }
132
133
134 /**
135 * Generate code to perform Z testing. Four Z values are tested at once.
136 * \param dsa Current depth-test state
137 * \param f Function to which code should be appended
138 * \param mask Index of register to contain depth-pass mask
139 * \param stored Index of register containing values from depth buffer
140 * \param calculated Index of register containing per-fragment depth values
141 *
142 * \return
143 * If the calculated depth comparison mask is the actual mask, \c FALSE is
144 * returned. If the calculated depth comparison mask is the compliment of
145 * the actual mask, \c TRUE is returned.
146 *
147 * \note Emits a maximum of 3 instructions.
148 */
149 static boolean
150 emit_depth_test(struct pipe_depth_stencil_alpha_state *dsa,
151 struct spe_function *f, int mask, int stored, int calculated)
152 {
153 unsigned func = (dsa->depth.enabled)
154 ? dsa->depth.func : PIPE_FUNC_ALWAYS;
155 int tmp = spe_allocate_available_register(f);
156 boolean compliment = FALSE;
157
158 switch (func) {
159 case PIPE_FUNC_NEVER:
160 spe_il(f, mask, 0);
161 break;
162
163 case PIPE_FUNC_NOTEQUAL:
164 compliment = TRUE;
165 /* FALLTHROUGH */
166 case PIPE_FUNC_EQUAL:
167 spe_ceq(f, mask, calculated, stored);
168 break;
169
170 case PIPE_FUNC_LEQUAL:
171 compliment = TRUE;
172 /* FALLTHROUGH */
173 case PIPE_FUNC_GREATER:
174 spe_clgt(f, mask, calculated, stored);
175 break;
176
177 case PIPE_FUNC_LESS:
178 compliment = TRUE;
179 /* FALLTHROUGH */
180 case PIPE_FUNC_GEQUAL:
181 spe_clgt(f, mask, calculated, stored);
182 spe_ceq(f, tmp, calculated, stored);
183 spe_or(f, mask, mask, tmp);
184 break;
185
186 case PIPE_FUNC_ALWAYS:
187 spe_il(f, mask, ~0);
188 break;
189
190 default:
191 assert(0);
192 break;
193 }
194
195 spe_release_register(f, tmp);
196 return compliment;
197 }
198
199
200 /**
201 * Generate code to apply the stencil operation (after testing).
202 * \note Emits a maximum of 5 instructions.
203 *
204 * \warning
205 * Since \c out and \c in might be the same register, this routine cannot
206 * generate code that uses \c out as a temporary.
207 */
208 static void
209 emit_stencil_op(struct spe_function *f,
210 int out, int in, int mask, unsigned op, unsigned ref)
211 {
212 const int clamp = spe_allocate_available_register(f);
213 const int clamp_mask = spe_allocate_available_register(f);
214 const int result = spe_allocate_available_register(f);
215
216 switch(op) {
217 case PIPE_STENCIL_OP_KEEP:
218 assert(0);
219 case PIPE_STENCIL_OP_ZERO:
220 spe_il(f, result, 0);
221 break;
222 case PIPE_STENCIL_OP_REPLACE:
223 spe_il(f, result, ref);
224 break;
225 case PIPE_STENCIL_OP_INCR:
226 /* clamp = [0xff, 0xff, 0xff, 0xff] */
227 spe_il(f, clamp, 0x0ff);
228 /* result[i] = in[i] + 1 */
229 spe_ai(f, result, in, 1);
230 /* clamp_mask[i] = (result[i] > 0xff) */
231 spe_clgti(f, clamp_mask, result, 0x0ff);
232 /* result[i] = clamp_mask[i] ? clamp[i] : result[i] */
233 spe_selb(f, result, result, clamp, clamp_mask);
234 break;
235 case PIPE_STENCIL_OP_DECR:
236 spe_il(f, clamp, 0);
237 spe_ai(f, result, in, -1);
238
239 /* If "(s-1) < 0" in signed arithemtic, then "(s-1) > MAX" in unsigned
240 * arithmetic.
241 */
242 spe_clgti(f, clamp_mask, result, 0x0ff);
243 spe_selb(f, result, result, clamp, clamp_mask);
244 break;
245 case PIPE_STENCIL_OP_INCR_WRAP:
246 spe_ai(f, result, in, 1);
247 break;
248 case PIPE_STENCIL_OP_DECR_WRAP:
249 spe_ai(f, result, in, -1);
250 break;
251 case PIPE_STENCIL_OP_INVERT:
252 spe_nor(f, result, in, in);
253 break;
254 default:
255 assert(0);
256 }
257
258 spe_selb(f, out, in, result, mask);
259
260 spe_release_register(f, result);
261 spe_release_register(f, clamp_mask);
262 spe_release_register(f, clamp);
263 }
264
265
266 /**
267 * Generate code to do stencil test. Four pixels are tested at once.
268 * \param dsa Depth / stencil test state
269 * \param face 0 for front face, 1 for back face
270 * \param f Function to append instructions to
271 * \param mask Register containing mask of fragments passing the
272 * alpha test
273 * \param depth_mask Register containing mask of fragments passing the
274 * depth test
275 * \param depth_compliment Is \c depth_mask the compliment of the actual mask?
276 * \param stencil Register containing values from stencil buffer
277 * \param depth_pass Register to store mask of fragments passing stencil test
278 * and depth test
279 *
280 * \note
281 * Emits a maximum of 10 + (3 * 5) = 25 instructions.
282 */
283 static int
284 emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa,
285 unsigned face,
286 struct spe_function *f,
287 int mask,
288 int depth_mask,
289 boolean depth_complement,
290 int stencil,
291 int depth_pass)
292 {
293 int stencil_fail = spe_allocate_available_register(f);
294 int depth_fail = spe_allocate_available_register(f);
295 int stencil_mask = spe_allocate_available_register(f);
296 int stencil_pass = spe_allocate_available_register(f);
297 int face_stencil = spe_allocate_available_register(f);
298 int stencil_src = stencil;
299 const unsigned ref = (dsa->stencil[face].ref_value
300 & dsa->stencil[face].valuemask);
301 boolean complement = FALSE;
302 int stored;
303 int tmp = spe_allocate_available_register(f);
304
305
306 if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
307 && (dsa->stencil[face].func != PIPE_FUNC_ALWAYS)
308 && (dsa->stencil[face].valuemask != 0x0ff)) {
309 stored = spe_allocate_available_register(f);
310 spe_andi(f, stored, stencil, dsa->stencil[face].valuemask);
311 } else {
312 stored = stencil;
313 }
314
315
316 switch (dsa->stencil[face].func) {
317 case PIPE_FUNC_NEVER:
318 spe_il(f, stencil_mask, 0); /* stencil_mask[0..3] = [0,0,0,0] */
319 break;
320
321 case PIPE_FUNC_NOTEQUAL:
322 complement = TRUE;
323 /* FALLTHROUGH */
324 case PIPE_FUNC_EQUAL:
325 /* stencil_mask[i] = (stored[i] == ref) */
326 spe_ceqi(f, stencil_mask, stored, ref);
327 break;
328
329 case PIPE_FUNC_LEQUAL:
330 complement = TRUE;
331 /* FALLTHROUGH */
332 case PIPE_FUNC_GREATER:
333 complement = TRUE;
334 /* stencil_mask[i] = (stored[i] > ref) */
335 spe_clgti(f, stencil_mask, stored, ref);
336 break;
337
338 case PIPE_FUNC_LESS:
339 complement = TRUE;
340 /* FALLTHROUGH */
341 case PIPE_FUNC_GEQUAL:
342 /* stencil_mask[i] = (stored[i] > ref) */
343 spe_clgti(f, stencil_mask, stored, ref);
344 /* tmp[i] = (stored[i] == ref) */
345 spe_ceqi(f, tmp, stored, ref);
346 /* stencil_mask[i] = stencil_mask[i] | tmp[i] */
347 spe_or(f, stencil_mask, stencil_mask, tmp);
348 break;
349
350 case PIPE_FUNC_ALWAYS:
351 /* See comment below. */
352 break;
353
354 default:
355 assert(0);
356 break;
357 }
358
359 if (stored != stencil) {
360 spe_release_register(f, stored);
361 }
362 spe_release_register(f, tmp);
363
364
365 /* ALWAYS is a very common stencil-test, so some effort is applied to
366 * optimize that case. The stencil-pass mask is the same as the input
367 * fragment mask. This makes the stencil-test (above) a no-op, and the
368 * input fragment mask can be "renamed" the stencil-pass mask.
369 */
370 if (dsa->stencil[face].func == PIPE_FUNC_ALWAYS) {
371 spe_release_register(f, stencil_pass);
372 stencil_pass = mask;
373 } else {
374 if (complement) {
375 spe_andc(f, stencil_pass, mask, stencil_mask);
376 } else {
377 spe_and(f, stencil_pass, mask, stencil_mask);
378 }
379 }
380
381 if (depth_complement) {
382 spe_andc(f, depth_pass, stencil_pass, depth_mask);
383 } else {
384 spe_and(f, depth_pass, stencil_pass, depth_mask);
385 }
386
387
388 /* Conditionally emit code to update the stencil value under various
389 * condititons. Note that there is no need to generate code under the
390 * following circumstances:
391 *
392 * - Stencil write mask is zero.
393 * - For stencil-fail if the stencil test is ALWAYS
394 * - For depth-fail if the stencil test is NEVER
395 * - For depth-pass if the stencil test is NEVER
396 * - Any of the 3 conditions if the operation is KEEP
397 */
398 if (dsa->stencil[face].writemask != 0) {
399 if ((dsa->stencil[face].func != PIPE_FUNC_ALWAYS)
400 && (dsa->stencil[face].fail_op != PIPE_STENCIL_OP_KEEP)) {
401 if (complement) {
402 spe_and(f, stencil_fail, mask, stencil_mask);
403 } else {
404 spe_andc(f, stencil_fail, mask, stencil_mask);
405 }
406
407 emit_stencil_op(f, face_stencil, stencil_src, stencil_fail,
408 dsa->stencil[face].fail_op,
409 dsa->stencil[face].ref_value);
410
411 stencil_src = face_stencil;
412 }
413
414 if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
415 && (dsa->stencil[face].zfail_op != PIPE_STENCIL_OP_KEEP)) {
416 if (depth_complement) {
417 spe_and(f, depth_fail, stencil_pass, depth_mask);
418 } else {
419 spe_andc(f, depth_fail, stencil_pass, depth_mask);
420 }
421
422 emit_stencil_op(f, face_stencil, stencil_src, depth_fail,
423 dsa->stencil[face].zfail_op,
424 dsa->stencil[face].ref_value);
425 stencil_src = face_stencil;
426 }
427
428 if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
429 && (dsa->stencil[face].zpass_op != PIPE_STENCIL_OP_KEEP)) {
430 emit_stencil_op(f, face_stencil, stencil_src, depth_pass,
431 dsa->stencil[face].zpass_op,
432 dsa->stencil[face].ref_value);
433 stencil_src = face_stencil;
434 }
435 }
436
437 spe_release_register(f, stencil_fail);
438 spe_release_register(f, depth_fail);
439 spe_release_register(f, stencil_mask);
440 if (stencil_pass != mask) {
441 spe_release_register(f, stencil_pass);
442 }
443
444 /* If all of the stencil operations were KEEP or the stencil write mask was
445 * zero, "stencil_src" will still be set to "stencil". In this case
446 * release the "face_stencil" register. Otherwise apply the stencil write
447 * mask to select bits from the calculated stencil value and the previous
448 * stencil value.
449 */
450 if (stencil_src == stencil) {
451 spe_release_register(f, face_stencil);
452 } else if (dsa->stencil[face].writemask != 0x0ff) {
453 int tmp = spe_allocate_available_register(f);
454
455 spe_il(f, tmp, dsa->stencil[face].writemask);
456 spe_selb(f, stencil_src, stencil, stencil_src, tmp);
457
458 spe_release_register(f, tmp);
459 }
460
461 return stencil_src;
462 }
463
464
465 void
466 cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa)
467 {
468 struct pipe_depth_stencil_alpha_state *const dsa = &cdsa->base;
469 struct spe_function *const f = &cdsa->code;
470
471 /* This code generates a maximum of 6 (alpha test) + 3 (depth test)
472 * + 25 (front stencil) + 25 (back stencil) + 4 = 63 instructions. Round
473 * up to 64 to make it a happy power-of-two.
474 */
475 spe_init_func(f, SPE_INST_SIZE * 64);
476
477
478 /* Allocate registers for the function's input parameters. Cleverly (and
479 * clever code is usually dangerous, but I couldn't resist) the generated
480 * function returns a structure. Returned structures start with register
481 * 3, and the structure fields are ordered to match up exactly with the
482 * input parameters.
483 */
484 int mask = spe_allocate_register(f, 3);
485 int depth = spe_allocate_register(f, 4);
486 int stencil = spe_allocate_register(f, 5);
487 int zvals = spe_allocate_register(f, 6);
488 int frag_a = spe_allocate_register(f, 7);
489 int facing = spe_allocate_register(f, 8);
490
491 int depth_mask = spe_allocate_available_register(f);
492
493 boolean depth_complement;
494
495
496 emit_alpha_test(dsa, f, mask, frag_a);
497
498 depth_complement = emit_depth_test(dsa, f, depth_mask, depth, zvals);
499
500 if (dsa->stencil[0].enabled) {
501 const int front_depth_pass = spe_allocate_available_register(f);
502 int front_stencil = emit_stencil_test(dsa, 0, f, mask,
503 depth_mask, depth_complement,
504 stencil, front_depth_pass);
505
506 if (dsa->stencil[1].enabled) {
507 const int back_depth_pass = spe_allocate_available_register(f);
508 int back_stencil = emit_stencil_test(dsa, 1, f, mask,
509 depth_mask, depth_complement,
510 stencil, back_depth_pass);
511
512 /* If the front facing stencil value and the back facing stencil
513 * value are stored in the same register, there is no need to select
514 * a value based on the facing. This can happen if the stencil value
515 * was not modified due to the write masks being zero, the stencil
516 * operations being KEEP, etc.
517 */
518 if (front_stencil != back_stencil) {
519 spe_selb(f, stencil, back_stencil, front_stencil, facing);
520 }
521
522 if (back_stencil != stencil) {
523 spe_release_register(f, back_stencil);
524 }
525
526 if (front_stencil != stencil) {
527 spe_release_register(f, front_stencil);
528 }
529
530 spe_selb(f, mask, back_depth_pass, front_depth_pass, facing);
531
532 spe_release_register(f, back_depth_pass);
533 } else {
534 if (front_stencil != stencil) {
535 spe_or(f, stencil, front_stencil, front_stencil);
536 spe_release_register(f, front_stencil);
537 }
538 spe_or(f, mask, front_depth_pass, front_depth_pass);
539 }
540
541 spe_release_register(f, front_depth_pass);
542 } else if (dsa->depth.enabled) {
543 if (depth_complement) {
544 spe_andc(f, mask, mask, depth_mask);
545 } else {
546 spe_and(f, mask, mask, depth_mask);
547 }
548 }
549
550 if (dsa->depth.writemask) {
551 spe_selb(f, depth, depth, zvals, mask);
552 }
553
554 spe_bi(f, 0, 0, 0); /* return from function call */
555
556
557 #if 0
558 {
559 const uint32_t *p = f->store;
560 unsigned i;
561
562 printf("# alpha (%sabled)\n",
563 (dsa->alpha.enabled) ? "en" : "dis");
564 printf("# func: %u\n", dsa->alpha.func);
565 printf("# ref: %.2f\n", dsa->alpha.ref);
566
567 printf("# depth (%sabled)\n",
568 (dsa->depth.enabled) ? "en" : "dis");
569 printf("# func: %u\n", dsa->depth.func);
570
571 for (i = 0; i < 2; i++) {
572 printf("# %s stencil (%sabled)\n",
573 (i == 0) ? "front" : "back",
574 (dsa->stencil[i].enabled) ? "en" : "dis");
575
576 printf("# func: %u\n", dsa->stencil[i].func);
577 printf("# op (sf, zf, zp): %u %u %u\n",
578 dsa->stencil[i].fail_op,
579 dsa->stencil[i].zfail_op,
580 dsa->stencil[i].zpass_op);
581 printf("# ref value / value mask / write mask: %02x %02x %02x\n",
582 dsa->stencil[i].ref_value,
583 dsa->stencil[i].valuemask,
584 dsa->stencil[i].writemask);
585 }
586
587 printf("\t.text\n");
588 for (/* empty */; p < f->csr; p++) {
589 printf("\t.long\t0x%04x\n", *p);
590 }
591 fflush(stdout);
592 }
593 #endif
594 }
595
596
597 /**
598 * \note Emits a maximum of 3 instructions
599 */
600 static int
601 emit_alpha_factor_calculation(struct spe_function *f,
602 unsigned factor,
603 int src_alpha, int dst_alpha, int const_alpha)
604 {
605 int factor_reg;
606 int tmp;
607
608
609 switch (factor) {
610 case PIPE_BLENDFACTOR_ONE:
611 factor_reg = -1;
612 break;
613
614 case PIPE_BLENDFACTOR_SRC_ALPHA:
615 factor_reg = spe_allocate_available_register(f);
616
617 spe_or(f, factor_reg, src_alpha, src_alpha);
618 break;
619
620 case PIPE_BLENDFACTOR_DST_ALPHA:
621 factor_reg = dst_alpha;
622 break;
623
624 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
625 factor_reg = -1;
626 break;
627
628 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
629 factor_reg = spe_allocate_available_register(f);
630
631 tmp = spe_allocate_available_register(f);
632 spe_il(f, tmp, 1);
633 spe_cuflt(f, tmp, tmp, 0);
634 spe_fs(f, factor_reg, tmp, const_alpha);
635 spe_release_register(f, tmp);
636 break;
637
638 case PIPE_BLENDFACTOR_CONST_ALPHA:
639 factor_reg = const_alpha;
640 break;
641
642 case PIPE_BLENDFACTOR_ZERO:
643 factor_reg = -1;
644 break;
645
646 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
647 tmp = spe_allocate_available_register(f);
648 factor_reg = spe_allocate_available_register(f);
649
650 spe_il(f, tmp, 1);
651 spe_cuflt(f, tmp, tmp, 0);
652 spe_fs(f, factor_reg, tmp, src_alpha);
653
654 spe_release_register(f, tmp);
655 break;
656
657 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
658 tmp = spe_allocate_available_register(f);
659 factor_reg = spe_allocate_available_register(f);
660
661 spe_il(f, tmp, 1);
662 spe_cuflt(f, tmp, tmp, 0);
663 spe_fs(f, factor_reg, tmp, dst_alpha);
664
665 spe_release_register(f, tmp);
666 break;
667
668 case PIPE_BLENDFACTOR_SRC1_ALPHA:
669 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
670 default:
671 assert(0);
672 factor_reg = -1;
673 break;
674 }
675
676 return factor_reg;
677 }
678
679
680 /**
681 * \note Emits a maximum of 6 instructions
682 */
683 static void
684 emit_color_factor_calculation(struct spe_function *f,
685 unsigned sF, unsigned mask,
686 const int *src,
687 const int *dst,
688 const int *const_color,
689 int *factor)
690 {
691 int tmp;
692 unsigned i;
693
694
695 factor[0] = -1;
696 factor[1] = -1;
697 factor[2] = -1;
698 factor[3] = -1;
699
700 switch (sF) {
701 case PIPE_BLENDFACTOR_ONE:
702 break;
703
704 case PIPE_BLENDFACTOR_SRC_COLOR:
705 for (i = 0; i < 3; ++i) {
706 if ((mask & (1U << i)) != 0) {
707 factor[i] = spe_allocate_available_register(f);
708 spe_or(f, factor[i], src[i], src[i]);
709 }
710 }
711 break;
712
713 case PIPE_BLENDFACTOR_SRC_ALPHA:
714 factor[0] = spe_allocate_available_register(f);
715 factor[1] = factor[0];
716 factor[2] = factor[0];
717
718 spe_or(f, factor[0], src[3], src[3]);
719 break;
720
721 case PIPE_BLENDFACTOR_DST_ALPHA:
722 factor[0] = dst[3];
723 factor[1] = dst[3];
724 factor[2] = dst[3];
725 break;
726
727 case PIPE_BLENDFACTOR_DST_COLOR:
728 factor[0] = dst[0];
729 factor[1] = dst[1];
730 factor[2] = dst[2];
731 break;
732
733 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
734 tmp = spe_allocate_available_register(f);
735 factor[0] = spe_allocate_available_register(f);
736 factor[1] = factor[0];
737 factor[2] = factor[0];
738
739 /* Alpha saturate means min(As, 1-Ad).
740 */
741 spe_il(f, tmp, 1);
742 spe_cuflt(f, tmp, tmp, 0);
743 spe_fs(f, tmp, tmp, dst[3]);
744 spe_fcgt(f, factor[0], tmp, src[3]);
745 spe_selb(f, factor[0], src[3], tmp, factor[0]);
746
747 spe_release_register(f, tmp);
748 break;
749
750 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
751 tmp = spe_allocate_available_register(f);
752 spe_il(f, tmp, 1);
753 spe_cuflt(f, tmp, tmp, 0);
754
755 for (i = 0; i < 3; i++) {
756 factor[i] = spe_allocate_available_register(f);
757
758 spe_fs(f, factor[i], tmp, const_color[i]);
759 }
760 spe_release_register(f, tmp);
761 break;
762
763 case PIPE_BLENDFACTOR_CONST_COLOR:
764 for (i = 0; i < 3; i++) {
765 factor[i] = const_color[i];
766 }
767 break;
768
769 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
770 factor[0] = spe_allocate_available_register(f);
771 factor[1] = factor[0];
772 factor[2] = factor[0];
773
774 tmp = spe_allocate_available_register(f);
775 spe_il(f, tmp, 1);
776 spe_cuflt(f, tmp, tmp, 0);
777 spe_fs(f, factor[0], tmp, const_color[3]);
778 spe_release_register(f, tmp);
779 break;
780
781 case PIPE_BLENDFACTOR_CONST_ALPHA:
782 factor[0] = const_color[3];
783 factor[1] = factor[0];
784 factor[2] = factor[0];
785 break;
786
787 case PIPE_BLENDFACTOR_ZERO:
788 break;
789
790 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
791 tmp = spe_allocate_available_register(f);
792
793 spe_il(f, tmp, 1);
794 spe_cuflt(f, tmp, tmp, 0);
795
796 for (i = 0; i < 3; ++i) {
797 if ((mask & (1U << i)) != 0) {
798 factor[i] = spe_allocate_available_register(f);
799 spe_fs(f, factor[i], tmp, src[i]);
800 }
801 }
802
803 spe_release_register(f, tmp);
804 break;
805
806 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
807 tmp = spe_allocate_available_register(f);
808 factor[0] = spe_allocate_available_register(f);
809 factor[1] = factor[0];
810 factor[2] = factor[0];
811
812 spe_il(f, tmp, 1);
813 spe_cuflt(f, tmp, tmp, 0);
814 spe_fs(f, factor[0], tmp, src[3]);
815
816 spe_release_register(f, tmp);
817 break;
818
819 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
820 tmp = spe_allocate_available_register(f);
821 factor[0] = spe_allocate_available_register(f);
822 factor[1] = factor[0];
823 factor[2] = factor[0];
824
825 spe_il(f, tmp, 1);
826 spe_cuflt(f, tmp, tmp, 0);
827 spe_fs(f, factor[0], tmp, dst[3]);
828
829 spe_release_register(f, tmp);
830 break;
831
832 case PIPE_BLENDFACTOR_INV_DST_COLOR:
833 tmp = spe_allocate_available_register(f);
834
835 spe_il(f, tmp, 1);
836 spe_cuflt(f, tmp, tmp, 0);
837
838 for (i = 0; i < 3; ++i) {
839 if ((mask & (1U << i)) != 0) {
840 factor[i] = spe_allocate_available_register(f);
841 spe_fs(f, factor[i], tmp, dst[i]);
842 }
843 }
844
845 spe_release_register(f, tmp);
846 break;
847
848 case PIPE_BLENDFACTOR_SRC1_COLOR:
849 case PIPE_BLENDFACTOR_SRC1_ALPHA:
850 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
851 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
852 default:
853 assert(0);
854 }
855 }
856
857
858 static void
859 emit_blend_calculation(struct spe_function *f,
860 unsigned func, unsigned sF, unsigned dF,
861 int src, int src_factor, int dst, int dst_factor)
862 {
863 int tmp = spe_allocate_available_register(f);
864
865 switch (func) {
866 case PIPE_BLEND_ADD:
867 if (sF == PIPE_BLENDFACTOR_ONE) {
868 if (dF == PIPE_BLENDFACTOR_ZERO) {
869 /* Do nothing. */
870 } else if (dF == PIPE_BLENDFACTOR_ONE) {
871 spe_fa(f, src, src, dst);
872 }
873 } else if (sF == PIPE_BLENDFACTOR_ZERO) {
874 if (dF == PIPE_BLENDFACTOR_ZERO) {
875 spe_il(f, src, 0);
876 } else if (dF == PIPE_BLENDFACTOR_ONE) {
877 spe_or(f, src, dst, dst);
878 } else {
879 spe_fm(f, src, dst, dst_factor);
880 }
881 } else if (dF == PIPE_BLENDFACTOR_ZERO) {
882 spe_fm(f, src, src, src_factor);
883 } else {
884 spe_fm(f, tmp, dst, dst_factor);
885 spe_fma(f, src, src, src_factor, tmp);
886 }
887 break;
888
889 case PIPE_BLEND_SUBTRACT:
890 if (sF == PIPE_BLENDFACTOR_ONE) {
891 if (dF == PIPE_BLENDFACTOR_ZERO) {
892 /* Do nothing. */
893 } else if (dF == PIPE_BLENDFACTOR_ONE) {
894 spe_fs(f, src, src, dst);
895 }
896 } else if (sF == PIPE_BLENDFACTOR_ZERO) {
897 if (dF == PIPE_BLENDFACTOR_ZERO) {
898 spe_il(f, src, 0);
899 } else if (dF == PIPE_BLENDFACTOR_ONE) {
900 spe_il(f, tmp, 0);
901 spe_fs(f, src, tmp, dst);
902 } else {
903 spe_fm(f, src, dst, dst_factor);
904 }
905 } else if (dF == PIPE_BLENDFACTOR_ZERO) {
906 spe_fm(f, src, src, src_factor);
907 } else {
908 spe_fm(f, tmp, dst, dst_factor);
909 spe_fms(f, src, src, src_factor, tmp);
910 }
911 break;
912
913 case PIPE_BLEND_REVERSE_SUBTRACT:
914 if (sF == PIPE_BLENDFACTOR_ONE) {
915 if (dF == PIPE_BLENDFACTOR_ZERO) {
916 spe_il(f, tmp, 0);
917 spe_fs(f, src, tmp, src);
918 } else if (dF == PIPE_BLENDFACTOR_ONE) {
919 spe_fs(f, src, dst, src);
920 }
921 } else if (sF == PIPE_BLENDFACTOR_ZERO) {
922 if (dF == PIPE_BLENDFACTOR_ZERO) {
923 spe_il(f, src, 0);
924 } else if (dF == PIPE_BLENDFACTOR_ONE) {
925 spe_or(f, src, dst, dst);
926 } else {
927 spe_fm(f, src, dst, dst_factor);
928 }
929 } else if (dF == PIPE_BLENDFACTOR_ZERO) {
930 spe_fm(f, src, src, src_factor);
931 } else {
932 spe_fm(f, tmp, src, src_factor);
933 spe_fms(f, src, src, dst_factor, tmp);
934 }
935 break;
936
937 case PIPE_BLEND_MIN:
938 spe_cgt(f, tmp, src, dst);
939 spe_selb(f, src, src, dst, tmp);
940 break;
941
942 case PIPE_BLEND_MAX:
943 spe_cgt(f, tmp, src, dst);
944 spe_selb(f, src, dst, src, tmp);
945 break;
946
947 default:
948 assert(0);
949 }
950
951 spe_release_register(f, tmp);
952 }
953
954
955 /**
956 * Generate code to perform alpha blending on the SPE
957 */
958 void
959 cell_generate_alpha_blend(struct cell_blend_state *cb)
960 {
961 struct pipe_blend_state *const b = &cb->base;
962 struct spe_function *const f = &cb->code;
963
964 /* This code generates a maximum of 3 (source alpha factor)
965 * + 3 (destination alpha factor) + (3 * 6) (source color factor)
966 * + (3 * 6) (destination color factor) + (4 * 2) (blend equation)
967 * + 4 (fragment mask) + 1 (return) = 55 instlructions. Round up to 64 to
968 * make it a happy power-of-two.
969 */
970 spe_init_func(f, SPE_INST_SIZE * 64);
971
972
973 const int frag[4] = {
974 spe_allocate_register(f, 3),
975 spe_allocate_register(f, 4),
976 spe_allocate_register(f, 5),
977 spe_allocate_register(f, 6),
978 };
979 const int pixel[4] = {
980 spe_allocate_register(f, 7),
981 spe_allocate_register(f, 8),
982 spe_allocate_register(f, 9),
983 spe_allocate_register(f, 10),
984 };
985 const int const_color[4] = {
986 spe_allocate_register(f, 11),
987 spe_allocate_register(f, 12),
988 spe_allocate_register(f, 13),
989 spe_allocate_register(f, 14),
990 };
991 unsigned func[4];
992 unsigned sF[4];
993 unsigned dF[4];
994 unsigned i;
995 int src_factor[4];
996 int dst_factor[4];
997
998
999 /* Does the selected blend mode make use of the source / destination
1000 * color (RGB) blend factors?
1001 */
1002 boolean need_color_factor = b->blend_enable
1003 && (b->rgb_func != PIPE_BLEND_MIN)
1004 && (b->rgb_func != PIPE_BLEND_MAX);
1005
1006 /* Does the selected blend mode make use of the source / destination
1007 * alpha blend factors?
1008 */
1009 boolean need_alpha_factor = b->blend_enable
1010 && (b->alpha_func != PIPE_BLEND_MIN)
1011 && (b->alpha_func != PIPE_BLEND_MAX);
1012
1013
1014 if (b->blend_enable) {
1015 sF[0] = b->rgb_src_factor;
1016 sF[1] = sF[0];
1017 sF[2] = sF[0];
1018 switch (b->alpha_src_factor & 0x0f) {
1019 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
1020 sF[3] = PIPE_BLENDFACTOR_ONE;
1021 break;
1022 case PIPE_BLENDFACTOR_SRC_COLOR:
1023 case PIPE_BLENDFACTOR_DST_COLOR:
1024 case PIPE_BLENDFACTOR_CONST_COLOR:
1025 case PIPE_BLENDFACTOR_SRC1_COLOR:
1026 sF[3] = b->alpha_src_factor + 1;
1027 break;
1028 default:
1029 sF[3] = b->alpha_src_factor;
1030 }
1031
1032 dF[0] = b->rgb_dst_factor;
1033 dF[1] = dF[0];
1034 dF[2] = dF[0];
1035 switch (b->alpha_dst_factor & 0x0f) {
1036 case PIPE_BLENDFACTOR_SRC_COLOR:
1037 case PIPE_BLENDFACTOR_DST_COLOR:
1038 case PIPE_BLENDFACTOR_CONST_COLOR:
1039 case PIPE_BLENDFACTOR_SRC1_COLOR:
1040 dF[3] = b->alpha_dst_factor + 1;
1041 break;
1042 default:
1043 dF[3] = b->alpha_dst_factor;
1044 }
1045
1046 func[0] = b->rgb_func;
1047 func[1] = func[0];
1048 func[2] = func[0];
1049 func[3] = b->alpha_func;
1050 } else {
1051 sF[0] = PIPE_BLENDFACTOR_ONE;
1052 sF[1] = PIPE_BLENDFACTOR_ONE;
1053 sF[2] = PIPE_BLENDFACTOR_ONE;
1054 sF[3] = PIPE_BLENDFACTOR_ONE;
1055 dF[0] = PIPE_BLENDFACTOR_ZERO;
1056 dF[1] = PIPE_BLENDFACTOR_ZERO;
1057 dF[2] = PIPE_BLENDFACTOR_ZERO;
1058 dF[3] = PIPE_BLENDFACTOR_ZERO;
1059
1060 func[0] = PIPE_BLEND_ADD;
1061 func[1] = PIPE_BLEND_ADD;
1062 func[2] = PIPE_BLEND_ADD;
1063 func[3] = PIPE_BLEND_ADD;
1064 }
1065
1066
1067 /* If alpha writing is enabled and the alpha blend mode requires use of
1068 * the alpha factor, calculate the alpha factor.
1069 */
1070 if (((b->colormask & 8) != 0) && need_alpha_factor) {
1071 src_factor[3] = emit_alpha_factor_calculation(f, sF[3], const_color[3],
1072 frag[3], pixel[3]);
1073
1074 /* If the alpha destination blend factor is the same as the alpha source
1075 * blend factor, re-use the previously calculated value.
1076 */
1077 dst_factor[3] = (dF[3] == sF[3])
1078 ? src_factor[3]
1079 : emit_alpha_factor_calculation(f, dF[3], const_color[3],
1080 frag[3], pixel[3]);
1081 }
1082
1083
1084 if (sF[0] == sF[3]) {
1085 src_factor[0] = src_factor[3];
1086 src_factor[1] = src_factor[3];
1087 src_factor[2] = src_factor[3];
1088 } else if (sF[0] == dF[3]) {
1089 src_factor[0] = dst_factor[3];
1090 src_factor[1] = dst_factor[3];
1091 src_factor[2] = dst_factor[3];
1092 } else if (need_color_factor) {
1093 emit_color_factor_calculation(f,
1094 b->rgb_src_factor,
1095 b->colormask,
1096 frag, pixel, const_color, src_factor);
1097 }
1098
1099
1100 if (dF[0] == sF[3]) {
1101 dst_factor[0] = src_factor[3];
1102 dst_factor[1] = src_factor[3];
1103 dst_factor[2] = src_factor[3];
1104 } else if (dF[0] == dF[3]) {
1105 dst_factor[0] = dst_factor[3];
1106 dst_factor[1] = dst_factor[3];
1107 dst_factor[2] = dst_factor[3];
1108 } else if (dF[0] == sF[0]) {
1109 dst_factor[0] = src_factor[0];
1110 dst_factor[1] = src_factor[1];
1111 dst_factor[2] = src_factor[2];
1112 } else if (need_color_factor) {
1113 emit_color_factor_calculation(f,
1114 b->rgb_dst_factor,
1115 b->colormask,
1116 frag, pixel, const_color, dst_factor);
1117 }
1118
1119
1120
1121 for (i = 0; i < 4; ++i) {
1122 if ((b->colormask & (1U << i)) != 0) {
1123 emit_blend_calculation(f,
1124 func[i], sF[i], dF[i],
1125 frag[i], src_factor[i],
1126 pixel[i], dst_factor[i]);
1127 }
1128 }
1129
1130 spe_bi(f, 0, 0, 0);
1131
1132 #if 0
1133 {
1134 const uint32_t *p = f->store;
1135
1136 printf("# %u instructions\n", f->csr - f->store);
1137 printf("# blend (%sabled)\n",
1138 (cb->base.blend_enable) ? "en" : "dis");
1139 printf("# RGB func / sf / df: %u %u %u\n",
1140 cb->base.rgb_func,
1141 cb->base.rgb_src_factor,
1142 cb->base.rgb_dst_factor);
1143 printf("# ALP func / sf / df: %u %u %u\n",
1144 cb->base.alpha_func,
1145 cb->base.alpha_src_factor,
1146 cb->base.alpha_dst_factor);
1147
1148 printf("\t.text\n");
1149 for (/* empty */; p < f->csr; p++) {
1150 printf("\t.long\t0x%04x\n", *p);
1151 }
1152 fflush(stdout);
1153 }
1154 #endif
1155 }
1156
1157
1158 static int
1159 PC_OFFSET(const struct spe_function *f, const void *d)
1160 {
1161 const intptr_t pc = (intptr_t) &f->store[f->num_inst];
1162 const intptr_t ea = ~0x0f & (intptr_t) d;
1163
1164 return (ea - pc) >> 2;
1165 }
1166
1167
1168 /**
1169 * Generate code to perform color conversion and logic op
1170 *
1171 * \bug
1172 * The code generated by this function should also perform dithering.
1173 *
1174 * \bug
1175 * The code generated by this function should also perform color-write
1176 * masking.
1177 *
1178 * \bug
1179 * Only two framebuffer formats are supported at this time.
1180 */
1181 void
1182 cell_generate_logic_op(struct spe_function *f,
1183 const struct pipe_blend_state *blend,
1184 struct pipe_surface *surf)
1185 {
1186 const unsigned logic_op = (blend->logicop_enable)
1187 ? blend->logicop_func : PIPE_LOGICOP_COPY;
1188
1189 /* This code generates a maximum of 37 instructions. An additional 32
1190 * bytes (equiv. to 8 instructions) are needed for data storage. Round up
1191 * to 64 to make it a happy power-of-two.
1192 */
1193 spe_init_func(f, SPE_INST_SIZE * 64);
1194
1195
1196 /* Pixel colors in framebuffer format in AoS layout.
1197 */
1198 const int pixel[4] = {
1199 spe_allocate_register(f, 3),
1200 spe_allocate_register(f, 4),
1201 spe_allocate_register(f, 5),
1202 spe_allocate_register(f, 6),
1203 };
1204
1205 /* Fragment colors stored as floats in SoA layout.
1206 */
1207 const int frag[4] = {
1208 spe_allocate_register(f, 7),
1209 spe_allocate_register(f, 8),
1210 spe_allocate_register(f, 9),
1211 spe_allocate_register(f, 10),
1212 };
1213
1214 const int mask = spe_allocate_register(f, 11);
1215
1216
1217 /* Short-circuit the noop and invert cases.
1218 */
1219 if ((logic_op == PIPE_LOGICOP_NOOP) || (blend->colormask == 0)) {
1220 spe_bi(f, 0, 0, 0);
1221 return;
1222 } else if (logic_op == PIPE_LOGICOP_INVERT) {
1223 spe_nor(f, pixel[0], pixel[0], pixel[0]);
1224 spe_nor(f, pixel[1], pixel[1], pixel[1]);
1225 spe_nor(f, pixel[2], pixel[2], pixel[2]);
1226 spe_nor(f, pixel[3], pixel[3], pixel[3]);
1227 spe_bi(f, 0, 0, 0);
1228 return;
1229 }
1230
1231
1232 const int tmp[4] = {
1233 spe_allocate_available_register(f),
1234 spe_allocate_available_register(f),
1235 spe_allocate_available_register(f),
1236 spe_allocate_available_register(f),
1237 };
1238
1239 const int shuf_xpose_hi = spe_allocate_available_register(f);
1240 const int shuf_xpose_lo = spe_allocate_available_register(f);
1241 const int shuf_color = spe_allocate_available_register(f);
1242
1243
1244 /* Pointer to the begining of the function's private data area.
1245 */
1246 uint32_t *const data = ((uint32_t *) f->store) + (64 - 8);
1247
1248
1249 /* Convert fragment colors to framebuffer format in AoS layout.
1250 */
1251 switch (surf->format) {
1252 case PIPE_FORMAT_A8R8G8B8_UNORM:
1253 data[0] = 0x00010203;
1254 data[1] = 0x10111213;
1255 data[2] = 0x04050607;
1256 data[3] = 0x14151617;
1257 data[4] = 0x0c000408;
1258 data[5] = 0x80808080;
1259 data[6] = 0x80808080;
1260 data[7] = 0x80808080;
1261 break;
1262 case PIPE_FORMAT_B8G8R8A8_UNORM:
1263 data[0] = 0x03020100;
1264 data[1] = 0x13121110;
1265 data[2] = 0x07060504;
1266 data[3] = 0x17161514;
1267 data[4] = 0x0804000c;
1268 data[5] = 0x80808080;
1269 data[6] = 0x80808080;
1270 data[7] = 0x80808080;
1271 break;
1272 default:
1273 fprintf(stderr, "CELL: Bad pixel format in cell_generate_logic_op()");
1274 ASSERT(0);
1275 }
1276
1277 spe_ilh(f, tmp[0], 0x0808);
1278 spe_lqr(f, shuf_xpose_hi, PC_OFFSET(f, data+0));
1279 spe_lqr(f, shuf_color, PC_OFFSET(f, data+4));
1280 spe_a(f, shuf_xpose_lo, shuf_xpose_hi, tmp[0]);
1281
1282 spe_shufb(f, tmp[0], frag[0], frag[2], shuf_xpose_hi);
1283 spe_shufb(f, tmp[1], frag[0], frag[2], shuf_xpose_lo);
1284 spe_shufb(f, tmp[2], frag[1], frag[3], shuf_xpose_hi);
1285 spe_shufb(f, tmp[3], frag[1], frag[3], shuf_xpose_lo);
1286
1287 spe_shufb(f, frag[0], tmp[0], tmp[2], shuf_xpose_hi);
1288 spe_shufb(f, frag[1], tmp[0], tmp[2], shuf_xpose_lo);
1289 spe_shufb(f, frag[2], tmp[1], tmp[3], shuf_xpose_hi);
1290 spe_shufb(f, frag[3], tmp[1], tmp[3], shuf_xpose_lo);
1291
1292 spe_cfltu(f, frag[0], frag[0], 32);
1293 spe_cfltu(f, frag[1], frag[1], 32);
1294 spe_cfltu(f, frag[2], frag[2], 32);
1295 spe_cfltu(f, frag[3], frag[3], 32);
1296
1297 spe_shufb(f, frag[0], frag[0], pixel[0], shuf_color);
1298 spe_shufb(f, frag[1], frag[1], pixel[1], shuf_color);
1299 spe_shufb(f, frag[2], frag[2], pixel[2], shuf_color);
1300 spe_shufb(f, frag[3], frag[3], pixel[3], shuf_color);
1301
1302
1303 /* If logic op is enabled, perform the requested logical operation on the
1304 * converted fragment colors and the pixel colors.
1305 */
1306 switch (logic_op) {
1307 case PIPE_LOGICOP_CLEAR:
1308 spe_il(f, frag[0], 0);
1309 spe_il(f, frag[1], 0);
1310 spe_il(f, frag[2], 0);
1311 spe_il(f, frag[3], 0);
1312 break;
1313 case PIPE_LOGICOP_NOR:
1314 spe_nor(f, frag[0], frag[0], pixel[0]);
1315 spe_nor(f, frag[1], frag[1], pixel[1]);
1316 spe_nor(f, frag[2], frag[2], pixel[2]);
1317 spe_nor(f, frag[3], frag[3], pixel[3]);
1318 break;
1319 case PIPE_LOGICOP_AND_INVERTED:
1320 spe_andc(f, frag[0], pixel[0], frag[0]);
1321 spe_andc(f, frag[1], pixel[1], frag[1]);
1322 spe_andc(f, frag[2], pixel[2], frag[2]);
1323 spe_andc(f, frag[3], pixel[3], frag[3]);
1324 break;
1325 case PIPE_LOGICOP_COPY_INVERTED:
1326 spe_nor(f, frag[0], frag[0], frag[0]);
1327 spe_nor(f, frag[1], frag[1], frag[1]);
1328 spe_nor(f, frag[2], frag[2], frag[2]);
1329 spe_nor(f, frag[3], frag[3], frag[3]);
1330 break;
1331 case PIPE_LOGICOP_AND_REVERSE:
1332 spe_andc(f, frag[0], frag[0], pixel[0]);
1333 spe_andc(f, frag[1], frag[1], pixel[1]);
1334 spe_andc(f, frag[2], frag[2], pixel[2]);
1335 spe_andc(f, frag[3], frag[3], pixel[3]);
1336 break;
1337 case PIPE_LOGICOP_XOR:
1338 spe_xor(f, frag[0], frag[0], pixel[0]);
1339 spe_xor(f, frag[1], frag[1], pixel[1]);
1340 spe_xor(f, frag[2], frag[2], pixel[2]);
1341 spe_xor(f, frag[3], frag[3], pixel[3]);
1342 break;
1343 case PIPE_LOGICOP_NAND:
1344 spe_nand(f, frag[0], frag[0], pixel[0]);
1345 spe_nand(f, frag[1], frag[1], pixel[1]);
1346 spe_nand(f, frag[2], frag[2], pixel[2]);
1347 spe_nand(f, frag[3], frag[3], pixel[3]);
1348 break;
1349 case PIPE_LOGICOP_AND:
1350 spe_and(f, frag[0], frag[0], pixel[0]);
1351 spe_and(f, frag[1], frag[1], pixel[1]);
1352 spe_and(f, frag[2], frag[2], pixel[2]);
1353 spe_and(f, frag[3], frag[3], pixel[3]);
1354 break;
1355 case PIPE_LOGICOP_EQUIV:
1356 spe_eqv(f, frag[0], frag[0], pixel[0]);
1357 spe_eqv(f, frag[1], frag[1], pixel[1]);
1358 spe_eqv(f, frag[2], frag[2], pixel[2]);
1359 spe_eqv(f, frag[3], frag[3], pixel[3]);
1360 break;
1361 case PIPE_LOGICOP_OR_INVERTED:
1362 spe_orc(f, frag[0], pixel[0], frag[0]);
1363 spe_orc(f, frag[1], pixel[1], frag[1]);
1364 spe_orc(f, frag[2], pixel[2], frag[2]);
1365 spe_orc(f, frag[3], pixel[3], frag[3]);
1366 break;
1367 case PIPE_LOGICOP_COPY:
1368 break;
1369 case PIPE_LOGICOP_OR_REVERSE:
1370 spe_orc(f, frag[0], frag[0], pixel[0]);
1371 spe_orc(f, frag[1], frag[1], pixel[1]);
1372 spe_orc(f, frag[2], frag[2], pixel[2]);
1373 spe_orc(f, frag[3], frag[3], pixel[3]);
1374 break;
1375 case PIPE_LOGICOP_OR:
1376 spe_or(f, frag[0], frag[0], pixel[0]);
1377 spe_or(f, frag[1], frag[1], pixel[1]);
1378 spe_or(f, frag[2], frag[2], pixel[2]);
1379 spe_or(f, frag[3], frag[3], pixel[3]);
1380 break;
1381 case PIPE_LOGICOP_SET:
1382 spe_il(f, frag[0], ~0);
1383 spe_il(f, frag[1], ~0);
1384 spe_il(f, frag[2], ~0);
1385 spe_il(f, frag[3], ~0);
1386 break;
1387
1388 /* These two cases are short-circuited above.
1389 */
1390 case PIPE_LOGICOP_INVERT:
1391 case PIPE_LOGICOP_NOOP:
1392 default:
1393 assert(0);
1394 }
1395
1396
1397 /* Apply fragment mask.
1398 */
1399 spe_ilh(f, tmp[0], 0x0000);
1400 spe_ilh(f, tmp[1], 0x0404);
1401 spe_ilh(f, tmp[2], 0x0808);
1402 spe_ilh(f, tmp[3], 0x0c0c);
1403
1404 spe_shufb(f, tmp[0], mask, mask, tmp[0]);
1405 spe_shufb(f, tmp[1], mask, mask, tmp[1]);
1406 spe_shufb(f, tmp[2], mask, mask, tmp[2]);
1407 spe_shufb(f, tmp[3], mask, mask, tmp[3]);
1408
1409 spe_selb(f, pixel[0], pixel[0], frag[0], tmp[0]);
1410 spe_selb(f, pixel[1], pixel[1], frag[1], tmp[1]);
1411 spe_selb(f, pixel[2], pixel[2], frag[2], tmp[2]);
1412 spe_selb(f, pixel[3], pixel[3], frag[3], tmp[3]);
1413
1414 spe_bi(f, 0, 0, 0);
1415
1416 #if 0
1417 {
1418 const uint32_t *p = f->store;
1419 unsigned i;
1420
1421 printf("# %u instructions\n", f->csr - f->store);
1422
1423 printf("\t.text\n");
1424 for (i = 0; i < 64; i++) {
1425 printf("\t.long\t0x%04x\n", p[i]);
1426 }
1427 fflush(stdout);
1428 }
1429 #endif
1430 }