cell: fix formatting
[mesa.git] / src / gallium / drivers / cell / ppu / cell_gen_fp.c
1 /**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29
30 /**
31 * Generate SPU fragment program/shader code.
32 *
33 * Note that we generate SOA-style code here. So each TGSI instruction
34 * operates on four pixels (and is translated into four SPU instructions,
35 * generally speaking).
36 *
37 * \author Brian Paul
38 */
39
40 #include <math.h>
41 #include "pipe/p_defines.h"
42 #include "pipe/p_state.h"
43 #include "pipe/p_shader_tokens.h"
44 #include "tgsi/tgsi_parse.h"
45 #include "tgsi/tgsi_util.h"
46 #include "tgsi/tgsi_exec.h"
47 #include "tgsi/tgsi_dump.h"
48 #include "rtasm/rtasm_ppc_spe.h"
49 #include "util/u_memory.h"
50 #include "cell_context.h"
51 #include "cell_gen_fp.h"
52
53
54 #define MAX_TEMPS 16
55 #define MAX_IMMED 8
56
57 #define CHAN_X 0
58 #define CHAN_Y 1
59 #define CHAN_Z 2
60 #define CHAN_W 3
61
62 /**
63 * Context needed during code generation.
64 */
65 struct codegen
66 {
67 struct cell_context *cell;
68 int inputs_reg; /**< 1st function parameter */
69 int outputs_reg; /**< 2nd function parameter */
70 int constants_reg; /**< 3rd function parameter */
71 int temp_regs[MAX_TEMPS][4]; /**< maps TGSI temps to SPE registers */
72 int imm_regs[MAX_IMMED][4]; /**< maps TGSI immediates to SPE registers */
73
74 int num_imm; /**< number of immediates */
75
76 int one_reg; /**< register containing {1.0, 1.0, 1.0, 1.0} */
77
78 /** Per-instruction temps / intermediate temps */
79 int num_itemps;
80 int itemps[10];
81
82 /** Current IF/ELSE/ENDIF nesting level */
83 int if_nesting;
84 /** Index of execution mask register */
85 int exec_mask_reg;
86
87 struct spe_function *f;
88 boolean error;
89 };
90
91
92 /**
93 * Allocate an intermediate temporary register.
94 */
95 static int
96 get_itemp(struct codegen *gen)
97 {
98 int t = spe_allocate_available_register(gen->f);
99 assert(gen->num_itemps < Elements(gen->itemps));
100 gen->itemps[gen->num_itemps++] = t;
101 return t;
102 }
103
104 /**
105 * Free all intermediate temporary registers. To be called after each
106 * instruction has been emitted.
107 */
108 static void
109 free_itemps(struct codegen *gen)
110 {
111 int i;
112 for (i = 0; i < gen->num_itemps; i++) {
113 spe_release_register(gen->f, gen->itemps[i]);
114 }
115 gen->num_itemps = 0;
116 }
117
118
119 /**
120 * Return index of an SPE register containing {1.0, 1.0, 1.0, 1.0}.
121 * The register is allocated and initialized upon the first call.
122 */
123 static int
124 get_const_one_reg(struct codegen *gen)
125 {
126 if (gen->one_reg <= 0) {
127 gen->one_reg = spe_allocate_available_register(gen->f);
128
129 spe_indent(gen->f, 4);
130 spe_comment(gen->f, -4, "INIT CONSTANT 1.0:");
131
132 /* one = {1.0, 1.0, 1.0, 1.0} */
133 spe_load_float(gen->f, gen->one_reg, 1.0f);
134
135 spe_indent(gen->f, -4);
136 }
137
138 return gen->one_reg;
139 }
140
141
142 /**
143 * Return index of the pixel execution mask.
144 * The register is allocated an initialized upon the first call.
145 *
146 * The pixel execution mask controls which pixels in a quad are
147 * modified, according to surrounding conditionals, loops, etc.
148 */
149 static int
150 get_exec_mask_reg(struct codegen *gen)
151 {
152 if (gen->exec_mask_reg <= 0) {
153 gen->exec_mask_reg = spe_allocate_available_register(gen->f);
154
155 spe_indent(gen->f, 4);
156 spe_comment(gen->f, -4, "INIT EXEC MASK = ~0:");
157
158 /* exec_mask = {~0, ~0, ~0, ~0} */
159 spe_load_int(gen->f, gen->exec_mask_reg, ~0);
160
161 spe_indent(gen->f, -4);
162 }
163
164 return gen->exec_mask_reg;
165 }
166
167
168 /**
169 * Return the index of the SPU temporary containing the named TGSI
170 * source register. If the TGSI register is a TGSI_FILE_TEMPORARY we
171 * just return the corresponding SPE register. If the TGIS register
172 * is TGSI_FILE_INPUT/CONSTANT/IMMEDIATE we allocate a new SPE register
173 * and emit an SPE load instruction.
174 */
175 static int
176 get_src_reg(struct codegen *gen,
177 int channel,
178 const struct tgsi_full_src_register *src)
179 {
180 int reg = -1;
181 int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel);
182 boolean reg_is_itemp = FALSE;
183 uint sign_op;
184
185 assert(swizzle >= TGSI_SWIZZLE_X);
186 assert(swizzle <= TGSI_EXTSWIZZLE_ONE);
187
188 switch (src->SrcRegister.File) {
189 case TGSI_FILE_TEMPORARY:
190 reg = gen->temp_regs[src->SrcRegister.Index][swizzle];
191 break;
192 case TGSI_FILE_INPUT:
193 {
194 if (swizzle == TGSI_EXTSWIZZLE_ONE) {
195 /* Load const one float and early out */
196 reg = get_const_one_reg(gen);
197 }
198 else if (swizzle == TGSI_EXTSWIZZLE_ZERO) {
199 /* Load const zero float and early out */
200 reg = get_itemp(gen);
201 spe_xor(gen->f, reg, reg, reg);
202 }
203 else {
204 /* offset is measured in quadwords, not bytes */
205 int offset = src->SrcRegister.Index * 4 + swizzle;
206 reg = get_itemp(gen);
207 reg_is_itemp = TRUE;
208 /* Load: reg = memory[(machine_reg) + offset] */
209 spe_lqd(gen->f, reg, gen->inputs_reg, offset);
210 }
211 }
212 break;
213 case TGSI_FILE_IMMEDIATE:
214 reg = gen->imm_regs[src->SrcRegister.Index][swizzle];
215 break;
216 case TGSI_FILE_CONSTANT:
217 /* xxx fall-through for now / fix */
218 default:
219 assert(0);
220 }
221
222 /*
223 * Handle absolute value, negate or set-negative of src register.
224 */
225 sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel);
226 if (sign_op != TGSI_UTIL_SIGN_KEEP) {
227 /*
228 * All sign ops are done by manipulating bit 31, the IEEE float sign bit.
229 */
230 const int bit31mask_reg = get_itemp(gen);
231 int result_reg;
232
233 if (reg_is_itemp) {
234 /* re-use 'reg' for the result */
235 result_reg = reg;
236 }
237 else {
238 /* alloc a new reg for the result */
239 result_reg = get_itemp(gen);
240 }
241
242 /* mask with bit 31 set, the rest cleared */
243 spe_load_int(gen->f, bit31mask_reg, (1 << 31));
244
245 if (sign_op == TGSI_UTIL_SIGN_CLEAR) {
246 spe_andc(gen->f, result_reg, reg, bit31mask_reg);
247 }
248 else if (sign_op == TGSI_UTIL_SIGN_SET) {
249 spe_and(gen->f, result_reg, reg, bit31mask_reg);
250 }
251 else {
252 assert(sign_op == TGSI_UTIL_SIGN_TOGGLE);
253 spe_xor(gen->f, result_reg, reg, bit31mask_reg);
254 }
255
256 reg = result_reg;
257 }
258
259 return reg;
260 }
261
262
263 /**
264 * Return the index of an SPE register to use for the given TGSI register.
265 * If the TGSI register is TGSI_FILE_TEMPORARAY, the index of the
266 * corresponding SPE register is returned. If the TGSI register is
267 * TGSI_FILE_OUTPUT we allocate an intermediate temporary register.
268 * See store_dest_reg() below...
269 */
270 static int
271 get_dst_reg(struct codegen *gen,
272 int channel,
273 const struct tgsi_full_dst_register *dest)
274 {
275 int reg = -1;
276
277 switch (dest->DstRegister.File) {
278 case TGSI_FILE_TEMPORARY:
279 if (gen->if_nesting > 0)
280 reg = get_itemp(gen);
281 else
282 reg = gen->temp_regs[dest->DstRegister.Index][channel];
283 break;
284 case TGSI_FILE_OUTPUT:
285 reg = get_itemp(gen);
286 break;
287 default:
288 assert(0);
289 }
290
291 return reg;
292 }
293
294
295 /**
296 * When a TGSI instruction is writing to an output register, this
297 * function emits the SPE store instruction to store the value_reg.
298 * \param value_reg the SPE register containing the value to store.
299 * This would have been returned by get_dst_reg().
300 */
301 static void
302 store_dest_reg(struct codegen *gen,
303 int value_reg, int channel,
304 const struct tgsi_full_dst_register *dest)
305 {
306 switch (dest->DstRegister.File) {
307 case TGSI_FILE_TEMPORARY:
308 if (gen->if_nesting > 0) {
309 int d_reg = gen->temp_regs[dest->DstRegister.Index][channel];
310 int exec_reg = get_exec_mask_reg(gen);
311 /* Mix d with new value according to exec mask:
312 * d[i] = mask_reg[i] ? value_reg : d_reg
313 */
314 spe_selb(gen->f, d_reg, d_reg, value_reg, exec_reg);
315 }
316 else {
317 /* we're not inside a condition or loop: do nothing special */
318 }
319 break;
320 case TGSI_FILE_OUTPUT:
321 {
322 /* offset is measured in quadwords, not bytes */
323 int offset = dest->DstRegister.Index * 4 + channel;
324 if (gen->if_nesting > 0) {
325 int exec_reg = get_exec_mask_reg(gen);
326 int curval_reg = get_itemp(gen);
327 /* First read the current value from memory:
328 * Load: curval = memory[(machine_reg) + offset]
329 */
330 spe_lqd(gen->f, curval_reg, gen->outputs_reg, offset);
331 /* Mix curval with newvalue according to exec mask:
332 * d[i] = mask_reg[i] ? value_reg : d_reg
333 */
334 spe_selb(gen->f, curval_reg, curval_reg, value_reg, exec_reg);
335 /* Store: memory[(machine_reg) + offset] = curval */
336 spe_stqd(gen->f, curval_reg, gen->outputs_reg, offset);
337 }
338 else {
339 /* Store: memory[(machine_reg) + offset] = reg */
340 spe_stqd(gen->f, value_reg, gen->outputs_reg, offset);
341 }
342 }
343 break;
344 default:
345 assert(0);
346 }
347 }
348
349
350 static boolean
351 emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst)
352 {
353 int ch;
354 spe_comment(gen->f, -4, "MOV:");
355 for (ch = 0; ch < 4; ch++) {
356 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
357 int src_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
358 int dst_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
359 /* XXX we don't always need to actually emit a mov instruction here */
360 spe_move(gen->f, dst_reg, src_reg);
361 store_dest_reg(gen, dst_reg, ch, &inst->FullDstRegisters[0]);
362 free_itemps(gen);
363 }
364 }
365 return true;
366 }
367
368 /**
369 * Emit addition instructions. Recall that a single TGSI_OPCODE_ADD
370 * becomes (up to) four SPU "fa" instructions because we're doing SOA
371 * processing.
372 */
373 static boolean
374 emit_ADD(struct codegen *gen, const struct tgsi_full_instruction *inst)
375 {
376 int ch;
377 spe_comment(gen->f, -4, "ADD:");
378 /* Loop over Red/Green/Blue/Alpha channels */
379 for (ch = 0; ch < 4; ch++) {
380 /* If the dest R, G, B or A writemask is enabled... */
381 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
382 /* get indexes of the two src, one dest SPE registers */
383 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
384 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
385 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
386
387 /* Emit actual SPE instruction: d = s1 + s2 */
388 spe_fa(gen->f, d_reg, s1_reg, s2_reg);
389
390 /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
391 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
392 /* Free any intermediate temps we allocated */
393 free_itemps(gen);
394 }
395 }
396 return true;
397 }
398
399 /**
400 * Emit subtract. See emit_ADD for comments.
401 */
402 static boolean
403 emit_SUB(struct codegen *gen, const struct tgsi_full_instruction *inst)
404 {
405 int ch;
406 spe_comment(gen->f, -4, "SUB:");
407 /* Loop over Red/Green/Blue/Alpha channels */
408 for (ch = 0; ch < 4; ch++) {
409 /* If the dest R, G, B or A writemask is enabled... */
410 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
411 /* get indexes of the two src, one dest SPE registers */
412 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
413 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
414 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
415
416 /* Emit actual SPE instruction: d = s1 - s2 */
417 spe_fs(gen->f, d_reg, s1_reg, s2_reg);
418
419 /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
420 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
421 /* Free any intermediate temps we allocated */
422 free_itemps(gen);
423 }
424 }
425 return true;
426 }
427
428 /**
429 * Emit multiply add. See emit_ADD for comments.
430 */
431 static boolean
432 emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst)
433 {
434 int ch;
435 spe_comment(gen->f, -4, "MAD:");
436 for (ch = 0; ch < 4; ch++) {
437 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
438 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
439 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
440 int s3_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
441 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
442 /* d = s1 * s2 + s3 */
443 spe_fma(gen->f, d_reg, s1_reg, s2_reg, s3_reg);
444 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
445 free_itemps(gen);
446 }
447 }
448 return true;
449 }
450
451
452 /**
453 * Emit linear interpolate. See emit_ADD for comments.
454 */
455 static boolean
456 emit_LERP(struct codegen *gen, const struct tgsi_full_instruction *inst)
457 {
458 int ch;
459 spe_comment(gen->f, -4, "LERP:");
460 for (ch = 0; ch < 4; ch++) {
461 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
462 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
463 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
464 int s3_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
465 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
466 /* d = s3 + s1(s2 - s3) */
467 spe_fs(gen->f, d_reg, s2_reg, s3_reg);
468 spe_fma(gen->f, d_reg, d_reg, s1_reg, s3_reg);
469 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
470 free_itemps(gen);
471 }
472 }
473 return true;
474 }
475
476 /**
477 * Emit multiply. See emit_ADD for comments.
478 */
479 static boolean
480 emit_MUL(struct codegen *gen, const struct tgsi_full_instruction *inst)
481 {
482 int ch;
483 spe_comment(gen->f, -4, "MUL:");
484 for (ch = 0; ch < 4; ch++) {
485 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
486 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
487 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
488 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
489 /* d = s1 * s2 */
490 spe_fm(gen->f, d_reg, s1_reg, s2_reg);
491 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
492 free_itemps(gen);
493 }
494 }
495 return true;
496 }
497
498 /**
499 * Emit reciprocal. See emit_ADD for comments.
500 */
501 static boolean
502 emit_RCP(struct codegen *gen, const struct tgsi_full_instruction *inst)
503 {
504 int ch;
505 spe_comment(gen->f, -4, "RCP:");
506 for (ch = 0; ch < 4; ch++) {
507 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
508 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
509 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
510 /* d = 1/s1 */
511 spe_frest(gen->f, d_reg, s1_reg);
512 spe_fi(gen->f, d_reg, s1_reg, d_reg);
513 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
514 free_itemps(gen);
515 }
516 }
517 return true;
518 }
519
520 /**
521 * Emit reciprocal sqrt. See emit_ADD for comments.
522 */
523 static boolean
524 emit_RSQ(struct codegen *gen, const struct tgsi_full_instruction *inst)
525 {
526 int ch;
527 spe_comment(gen->f, -4, "RSQ:");
528 for (ch = 0; ch < 4; ch++) {
529 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
530 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
531 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
532 /* d = 1/s1 */
533 spe_frsqest(gen->f, d_reg, s1_reg);
534 spe_fi(gen->f, d_reg, s1_reg, d_reg);
535 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
536 free_itemps(gen);
537 }
538 }
539 return true;
540 }
541
542 /**
543 * Emit absolute value. See emit_ADD for comments.
544 */
545 static boolean
546 emit_ABS(struct codegen *gen, const struct tgsi_full_instruction *inst)
547 {
548 int ch;
549 spe_comment(gen->f, -4, "ABS:");
550 for (ch = 0; ch < 4; ch++) {
551 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
552 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
553 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
554 const int bit31mask_reg = get_itemp(gen);
555
556 /* mask with bit 31 set, the rest cleared */
557 spe_load_int(gen->f, bit31mask_reg, (1 << 31));
558
559 /* d = sign bit cleared in s1 */
560 spe_andc(gen->f, d_reg, s1_reg, bit31mask_reg);
561
562 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
563 free_itemps(gen);
564 }
565 }
566 return true;
567 }
568
569 /**
570 * Emit 3 component dot product. See emit_ADD for comments.
571 */
572 static boolean
573 emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst)
574 {
575 int ch;
576 spe_comment(gen->f, -4, "DP3:");
577
578 int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
579 int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
580 int tmp_reg = get_itemp(gen);
581 /* t = x0 * x1 */
582 spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
583
584 s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
585 s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
586 /* t = y0 * y1 + t */
587 spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
588
589 s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
590 s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
591 /* t = z0 * z1 + t */
592 spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
593
594 for (ch = 0; ch < 4; ch++) {
595 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
596 store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]);
597 }
598 }
599
600 free_itemps(gen);
601 return true;
602 }
603
604 /**
605 * Emit 4 component dot product. See emit_ADD for comments.
606 */
607 static boolean
608 emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst)
609 {
610 int ch;
611 spe_comment(gen->f, -4, "DP4:");
612
613 int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
614 int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
615 int tmp_reg = get_itemp(gen);
616 /* t = x0 * x1 */
617 spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
618
619 s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
620 s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
621 /* t = y0 * y1 + t */
622 spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
623
624 s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
625 s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
626 /* t = z0 * z1 + t */
627 spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
628
629 s1_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[0]);
630 s2_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]);
631 /* t = w0 * w1 + t */
632 spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
633
634 for (ch = 0; ch < 4; ch++) {
635 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
636 store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]);
637 }
638 }
639
640 free_itemps(gen);
641 return true;
642 }
643
644 /**
645 * Emit homogeneous dot product. See emit_ADD for comments.
646 */
647 static boolean
648 emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst)
649 {
650 int ch;
651 spe_comment(gen->f, -4, "DPH:");
652
653 int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
654 int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
655 int tmp_reg = get_itemp(gen);
656
657 /* t = x0 * x1 */
658 spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
659
660 s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
661 s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
662 /* t = y0 * y1 + t */
663 spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
664
665 s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
666 s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
667 /* t = z0 * z1 + t */
668 spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
669
670 s2_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]);
671 /* t = w1 + t */
672 spe_fa(gen->f, tmp_reg, s2_reg, tmp_reg);
673
674 for (ch = 0; ch < 4; ch++) {
675 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
676 store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]);
677 }
678 }
679
680 free_itemps(gen);
681 return true;
682 }
683
684 /**
685 * Emit cross product. See emit_ADD for comments.
686 */
687 static boolean
688 emit_XPD(struct codegen *gen, const struct tgsi_full_instruction *inst)
689 {
690 spe_comment(gen->f, -4, "XPD:");
691
692 int s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
693 int s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
694 int tmp_reg = get_itemp(gen);
695
696 /* t = z0 * y1 */
697 spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
698
699 s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
700 s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
701 /* t = y0 * z1 - t */
702 spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
703
704 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_X)) {
705 store_dest_reg(gen, tmp_reg, CHAN_X, &inst->FullDstRegisters[0]);
706 }
707
708 s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
709 s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
710 /* t = x0 * z1 */
711 spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
712
713 s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
714 s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
715 /* t = z0 * x1 - t */
716 spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
717
718 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Y)) {
719 store_dest_reg(gen, tmp_reg, CHAN_Y, &inst->FullDstRegisters[0]);
720 }
721
722 s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
723 s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
724 /* t = y0 * x1 */
725 spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
726
727 s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
728 s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
729 /* t = x0 * y1 - t */
730 spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
731
732 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Z)) {
733 store_dest_reg(gen, tmp_reg, CHAN_Z, &inst->FullDstRegisters[0]);
734 }
735
736 free_itemps(gen);
737 return true;
738 }
739
740 /**
741 * Emit set-if-greater-than.
742 * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as
743 * the result but OpenGL/TGSI needs 0.0 and 1.0 results.
744 * We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND.
745 */
746 static boolean
747 emit_SGT(struct codegen *gen, const struct tgsi_full_instruction *inst)
748 {
749 int ch;
750
751 spe_comment(gen->f, -4, "SGT:");
752
753 for (ch = 0; ch < 4; ch++) {
754 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
755 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
756 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
757 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
758
759 /* d = (s1 > s2) */
760 spe_fcgt(gen->f, d_reg, s1_reg, s2_reg);
761
762 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
763 /* d = d & one_reg */
764 spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
765
766 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
767 free_itemps(gen);
768 }
769 }
770
771 return true;
772 }
773
774 /**
775 * Emit set-if_less-then. See emit_SGT for comments.
776 */
777 static boolean
778 emit_SLT(struct codegen *gen, const struct tgsi_full_instruction *inst)
779 {
780 int ch;
781
782 spe_comment(gen->f, -4, "SLT:");
783
784 for (ch = 0; ch < 4; ch++) {
785 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
786 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
787 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
788 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
789
790 /* d = (s1 < s2) */
791 spe_fcgt(gen->f, d_reg, s2_reg, s1_reg);
792
793 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
794 /* d = d & one_reg */
795 spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
796
797 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
798 free_itemps(gen);
799 }
800 }
801
802 return true;
803 }
804
805 /**
806 * Emit set-if_greater-then-or-equal. See emit_SGT for comments.
807 */
808 static boolean
809 emit_SGE(struct codegen *gen, const struct tgsi_full_instruction *inst)
810 {
811 int ch;
812
813 spe_comment(gen->f, -4, "SGE:");
814
815 for (ch = 0; ch < 4; ch++) {
816 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
817 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
818 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
819 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
820
821 /* d = (s1 >= s2) */
822 spe_fcgt(gen->f, d_reg, s2_reg, s1_reg);
823
824 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
825 /* d = ~d & one_reg */
826 spe_andc(gen->f, d_reg, get_const_one_reg(gen), d_reg);
827
828 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
829 free_itemps(gen);
830 }
831 }
832
833 return true;
834 }
835
836 /**
837 * Emit set-if_less-then-or-equal. See emit_SGT for comments.
838 */
839 static boolean
840 emit_SLE(struct codegen *gen, const struct tgsi_full_instruction *inst)
841 {
842 int ch;
843
844 spe_comment(gen->f, -4, "SLE:");
845
846 for (ch = 0; ch < 4; ch++) {
847 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
848 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
849 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
850 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
851
852 /* d = (s1 <= s2) */
853 spe_fcgt(gen->f, d_reg, s1_reg, s2_reg);
854
855 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
856 /* d = ~d & one_reg */
857 spe_andc(gen->f, d_reg, get_const_one_reg(gen), d_reg);
858
859 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
860 free_itemps(gen);
861 }
862 }
863
864 return true;
865 }
866
867 /**
868 * Emit set-if_equal. See emit_SGT for comments.
869 */
870 static boolean
871 emit_SEQ(struct codegen *gen, const struct tgsi_full_instruction *inst)
872 {
873 int ch;
874
875 spe_comment(gen->f, -4, "SEQ:");
876
877 for (ch = 0; ch < 4; ch++) {
878 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
879 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
880 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
881 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
882
883 /* d = (s1 == s2) */
884 spe_fceq(gen->f, d_reg, s1_reg, s2_reg);
885
886 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
887 /* d = d & one_reg */
888 spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
889
890 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
891 free_itemps(gen);
892 }
893 }
894
895 return true;
896 }
897
898 /**
899 * Emit set-if_not_equal. See emit_SGT for comments.
900 */
901 static boolean
902 emit_SNE(struct codegen *gen, const struct tgsi_full_instruction *inst)
903 {
904 int ch;
905
906 spe_comment(gen->f, -4, "SNE:");
907
908 for (ch = 0; ch < 4; ch++) {
909 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
910 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
911 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
912 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
913
914 /* d = (s1 != s2) */
915 spe_fceq(gen->f, d_reg, s1_reg, s2_reg);
916 spe_nor(gen->f, d_reg, d_reg, d_reg);
917
918 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
919 /* d = d & one_reg */
920 spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
921
922 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
923 free_itemps(gen);
924 }
925 }
926
927 return true;
928 }
929
930 /**
931 * Emit compare. See emit_SGT for comments.
932 */
933 static boolean
934 emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst)
935 {
936 int ch;
937
938 spe_comment(gen->f, -4, "CMP:");
939
940 for (ch = 0; ch < 4; ch++) {
941 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
942 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
943 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
944 int s3_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
945 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
946 int zero_reg = get_itemp(gen);
947
948 spe_xor(gen->f, zero_reg, zero_reg, zero_reg);
949
950 /* d = (s1 < 0) ? s2 : s3 */
951 spe_fcgt(gen->f, d_reg, zero_reg, s1_reg);
952 spe_selb(gen->f, d_reg, s3_reg, s2_reg, d_reg);
953
954 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
955 free_itemps(gen);
956 }
957 }
958
959 return true;
960 }
961
962 /**
963 * Emit trunc.
964 * Convert float to signed int
965 * Convert signed int to float
966 */
967 static boolean
968 emit_TRUNC(struct codegen *gen, const struct tgsi_full_instruction *inst)
969 {
970 int ch;
971
972 spe_comment(gen->f, -4, "TRUNC:");
973
974 for (ch = 0; ch < 4; ch++) {
975 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
976 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
977 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
978
979 /* Convert float to int */
980 spe_cflts(gen->f, d_reg, s1_reg, 0);
981
982 /* Convert int to float */
983 spe_csflt(gen->f, d_reg, d_reg, 0);
984
985 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
986 free_itemps(gen);
987 }
988 }
989
990 return true;
991 }
992
993 /**
994 * Emit floor.
995 * If negative int subtract one
996 * Convert float to signed int
997 * Convert signed int to float
998 */
999 static boolean
1000 emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst)
1001 {
1002 int ch;
1003
1004 spe_comment(gen->f, -4, "FLR:");
1005
1006 int zero_reg = get_itemp(gen);
1007 spe_xor(gen->f, zero_reg, zero_reg, zero_reg);
1008
1009 for (ch = 0; ch < 4; ch++) {
1010 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1011 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1012 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1013 int tmp_reg = get_itemp(gen);
1014
1015 /* If negative, subtract 1.0 */
1016 spe_fcgt(gen->f, d_reg, zero_reg, s1_reg);
1017 spe_selb(gen->f, tmp_reg, zero_reg, get_const_one_reg(gen), d_reg);
1018 spe_fs(gen->f, d_reg, s1_reg, tmp_reg);
1019
1020 /* Convert float to int */
1021 spe_cflts(gen->f, d_reg, d_reg, 0);
1022
1023 /* Convert int to float */
1024 spe_csflt(gen->f, d_reg, d_reg, 0);
1025
1026 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1027 free_itemps(gen);
1028 }
1029 }
1030
1031 return true;
1032 }
1033
1034 /**
1035 * Emit frac.
1036 * Input - FLR(Input)
1037 */
1038 static boolean
1039 emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst)
1040 {
1041 int ch;
1042
1043 spe_comment(gen->f, -4, "FLR:");
1044
1045 int zero_reg = get_itemp(gen);
1046 spe_xor(gen->f, zero_reg, zero_reg, zero_reg);
1047
1048 for (ch = 0; ch < 4; ch++) {
1049 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1050 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1051 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1052 int tmp_reg = get_itemp(gen);
1053
1054 /* If negative, subtract 1.0 */
1055 spe_fcgt(gen->f, d_reg, zero_reg, s1_reg);
1056 spe_selb(gen->f, tmp_reg, zero_reg, get_const_one_reg(gen), d_reg);
1057 spe_fs(gen->f, d_reg, s1_reg, tmp_reg);
1058
1059 /* Convert float to int */
1060 spe_cflts(gen->f, d_reg, d_reg, 0);
1061
1062 /* Convert int to float */
1063 spe_csflt(gen->f, d_reg, d_reg, 0);
1064
1065 /* d = s1 - FLR(s1) */
1066 spe_fs(gen->f, d_reg, s1_reg, d_reg);
1067
1068 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1069 free_itemps(gen);
1070 }
1071 }
1072
1073 return true;
1074 }
1075
1076
1077 #if 0
1078 static void
1079 print_functions(struct cell_context *cell)
1080 {
1081 struct cell_spu_function_info *funcs = &cell->spu_functions;
1082 uint i;
1083 for (i = 0; i < funcs->num; i++) {
1084 printf("SPU func %u: %s at %u\n",
1085 i, funcs->names[i], funcs->addrs[i]);
1086 }
1087 }
1088 #endif
1089
1090
1091 /**
1092 * Emit code to call a SPU function.
1093 * Used to implement instructions like SIN/COS/POW/TEX/etc.
1094 */
1095 static boolean
1096 emit_function_call(struct codegen *gen,
1097 const struct tgsi_full_instruction *inst,
1098 char *funcname, uint num_args)
1099 {
1100 const struct cell_spu_function_info *funcs = &gen->cell->spu_functions;
1101 char comment[100];
1102 uint addr;
1103 int ch;
1104
1105 /* XXX temporary value */
1106 const int frameSize = 64; /* stack frame (activation record) size */
1107
1108 assert(num_args <= 3);
1109
1110 /* lookup function address */
1111 {
1112 uint i;
1113 addr = 0;
1114 for (i = 0; i < funcs->num; i++) {
1115 if (strcmp(funcs->names[i], funcname) == 0) {
1116 addr = funcs->addrs[i];
1117 }
1118 }
1119 assert(addr && "spu function not found");
1120 }
1121
1122 addr /= 4; /* discard 2 least significant bits */
1123
1124 snprintf(comment, sizeof(comment), "CALL %s:", funcname);
1125 spe_comment(gen->f, -4, comment);
1126
1127 for (ch = 0; ch < 4; ch++) {
1128 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1129 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1130 int s_regs[3];
1131 uint a;
1132 for (a = 0; a < num_args; a++) {
1133 s_regs[a] = get_src_reg(gen, ch, &inst->FullSrcRegisters[a]);
1134 }
1135
1136 /* Basically:
1137 * save registers on stack
1138 * move parameters to registers 3, 4, 5...
1139 * call function
1140 * save return value (reg 3)
1141 * restore registers from stack
1142 */
1143
1144 /* XXX hack: load first function param */
1145 spe_move(gen->f, 3, s_regs[0]);
1146
1147 /* save $lr on stack # stqd $lr,16($sp) */
1148 spe_stqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16);
1149 /* save stack pointer # stqd $sp,-frameSize($sp) */
1150 spe_stqd(gen->f, SPE_REG_SP, SPE_REG_SP, -frameSize);
1151
1152 /* XXX save registers to stack here */
1153
1154 /* adjust stack pointer # ai $sp,$sp,-frameSize */
1155 spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, -frameSize);
1156
1157 /* branch to function, save return addr */
1158 spe_brasl(gen->f, SPE_REG_RA, addr);
1159
1160 /* restore stack pointer # ai $sp,$sp,frameSize */
1161 spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, frameSize);
1162
1163 /* XXX restore registers from stack here */
1164
1165 /* restore $lr # lqd $lr,16($sp) */
1166 spe_lqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16);
1167
1168 /* XXX hack: save function's return value */
1169 spe_move(gen->f, d_reg, 3);
1170
1171 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1172 free_itemps(gen);
1173 }
1174 }
1175
1176 return true;
1177 }
1178
1179
1180 /**
1181 * Emit max. See emit_SGT for comments.
1182 */
1183 static boolean
1184 emit_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst)
1185 {
1186 int ch;
1187
1188 spe_comment(gen->f, -4, "MAX:");
1189
1190 for (ch = 0; ch < 4; ch++) {
1191 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1192 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1193 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
1194 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1195
1196 /* d = (s1 > s2) ? s1 : s2 */
1197 spe_fcgt(gen->f, d_reg, s1_reg, s2_reg);
1198 spe_selb(gen->f, d_reg, s2_reg, s1_reg, d_reg);
1199
1200 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1201 free_itemps(gen);
1202 }
1203 }
1204
1205 return true;
1206 }
1207
1208 /**
1209 * Emit max. See emit_SGT for comments.
1210 */
1211 static boolean
1212 emit_MIN(struct codegen *gen, const struct tgsi_full_instruction *inst)
1213 {
1214 int ch;
1215
1216 spe_comment(gen->f, -4, "MIN:");
1217
1218 for (ch = 0; ch < 4; ch++) {
1219 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1220 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1221 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
1222 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1223
1224 /* d = (s2 > s1) ? s1 : s2 */
1225 spe_fcgt(gen->f, d_reg, s2_reg, s1_reg);
1226 spe_selb(gen->f, d_reg, s2_reg, s1_reg, d_reg);
1227
1228 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1229 free_itemps(gen);
1230 }
1231 }
1232
1233 return true;
1234 }
1235
1236 static boolean
1237 emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst)
1238 {
1239 const int channel = 0;
1240 const int exec_reg = get_exec_mask_reg(gen);
1241
1242 spe_comment(gen->f, -4, "IF:");
1243
1244 /* update execution mask with the predicate register */
1245 int tmp_reg = get_itemp(gen);
1246 int s1_reg = get_src_reg(gen, channel, &inst->FullSrcRegisters[0]);
1247
1248 /* tmp = (s1_reg == 0) */
1249 spe_ceqi(gen->f, tmp_reg, s1_reg, 0);
1250 /* tmp = !tmp */
1251 spe_complement(gen->f, tmp_reg, tmp_reg);
1252 /* exec_mask = exec_mask & tmp */
1253 spe_and(gen->f, exec_reg, exec_reg, tmp_reg);
1254
1255 gen->if_nesting++;
1256
1257 free_itemps(gen);
1258
1259 return true;
1260 }
1261
1262
1263 static boolean
1264 emit_ELSE(struct codegen *gen, const struct tgsi_full_instruction *inst)
1265 {
1266 const int exec_reg = get_exec_mask_reg(gen);
1267
1268 spe_comment(gen->f, -4, "ELSE:");
1269
1270 /* exec_mask = !exec_mask */
1271 spe_complement(gen->f, exec_reg, exec_reg);
1272
1273 return true;
1274 }
1275
1276
1277 static boolean
1278 emit_ENDIF(struct codegen *gen, const struct tgsi_full_instruction *inst)
1279 {
1280 const int exec_reg = get_exec_mask_reg(gen);
1281
1282 spe_comment(gen->f, -4, "ENDIF:");
1283
1284 /* XXX todo: pop execution mask */
1285
1286 spe_load_int(gen->f, exec_reg, ~0x0);
1287
1288 gen->if_nesting--;
1289 return true;
1290 }
1291
1292
1293 static boolean
1294 emit_DDX_DDY(struct codegen *gen, const struct tgsi_full_instruction *inst,
1295 boolean ddx)
1296 {
1297 int ch;
1298
1299 spe_comment(gen->f, -4, ddx ? "DDX:" : "DDY:");
1300
1301 for (ch = 0; ch < 4; ch++) {
1302 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1303 int s_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1304 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1305
1306 int t1_reg = get_itemp(gen);
1307 int t2_reg = get_itemp(gen);
1308
1309 spe_splat_word(gen->f, t1_reg, s_reg, 0); /* upper-left pixel */
1310 if (ddx) {
1311 spe_splat_word(gen->f, t2_reg, s_reg, 1); /* upper-right pixel */
1312 }
1313 else {
1314 spe_splat_word(gen->f, t2_reg, s_reg, 2); /* lower-left pixel */
1315 }
1316 spe_fs(gen->f, d_reg, t2_reg, t1_reg);
1317
1318 free_itemps(gen);
1319 }
1320 }
1321
1322 return true;
1323 }
1324
1325
1326
1327
1328 /**
1329 * Emit END instruction.
1330 * We just return from the shader function at this point.
1331 *
1332 * Note that there may be more code after this that would be
1333 * called by TGSI_OPCODE_CALL.
1334 */
1335 static boolean
1336 emit_END(struct codegen *gen)
1337 {
1338 spe_comment(gen->f, -4, "END:");
1339 /* return from function call */
1340 spe_bi(gen->f, SPE_REG_RA, 0, 0);
1341 return true;
1342 }
1343
1344
1345 /**
1346 * Emit code for the given instruction. Just a big switch stmt.
1347 */
1348 static boolean
1349 emit_instruction(struct codegen *gen,
1350 const struct tgsi_full_instruction *inst)
1351 {
1352 switch (inst->Instruction.Opcode) {
1353 case TGSI_OPCODE_MOV:
1354 case TGSI_OPCODE_SWZ:
1355 return emit_MOV(gen, inst);
1356 case TGSI_OPCODE_MUL:
1357 return emit_MUL(gen, inst);
1358 case TGSI_OPCODE_ADD:
1359 return emit_ADD(gen, inst);
1360 case TGSI_OPCODE_SUB:
1361 return emit_SUB(gen, inst);
1362 case TGSI_OPCODE_MAD:
1363 return emit_MAD(gen, inst);
1364 case TGSI_OPCODE_LERP:
1365 return emit_LERP(gen, inst);
1366 case TGSI_OPCODE_DP3:
1367 return emit_DP3(gen, inst);
1368 case TGSI_OPCODE_DP4:
1369 return emit_DP4(gen, inst);
1370 case TGSI_OPCODE_DPH:
1371 return emit_DPH(gen, inst);
1372 case TGSI_OPCODE_XPD:
1373 return emit_XPD(gen, inst);
1374 case TGSI_OPCODE_RCP:
1375 return emit_RCP(gen, inst);
1376 case TGSI_OPCODE_RSQ:
1377 return emit_RSQ(gen, inst);
1378 case TGSI_OPCODE_ABS:
1379 return emit_ABS(gen, inst);
1380 case TGSI_OPCODE_SGT:
1381 return emit_SGT(gen, inst);
1382 case TGSI_OPCODE_SLT:
1383 return emit_SLT(gen, inst);
1384 case TGSI_OPCODE_SGE:
1385 return emit_SGE(gen, inst);
1386 case TGSI_OPCODE_SLE:
1387 return emit_SLE(gen, inst);
1388 case TGSI_OPCODE_SEQ:
1389 return emit_SEQ(gen, inst);
1390 case TGSI_OPCODE_SNE:
1391 return emit_SNE(gen, inst);
1392 case TGSI_OPCODE_CMP:
1393 return emit_CMP(gen, inst);
1394 case TGSI_OPCODE_MAX:
1395 return emit_MAX(gen, inst);
1396 case TGSI_OPCODE_MIN:
1397 return emit_MIN(gen, inst);
1398 case TGSI_OPCODE_TRUNC:
1399 return emit_TRUNC(gen, inst);
1400 case TGSI_OPCODE_FLR:
1401 return emit_FLR(gen, inst);
1402 case TGSI_OPCODE_FRC:
1403 return emit_FRC(gen, inst);
1404 case TGSI_OPCODE_END:
1405 return emit_END(gen);
1406
1407 case TGSI_OPCODE_COS:
1408 return emit_function_call(gen, inst, "spu_cos", 1);
1409 case TGSI_OPCODE_SIN:
1410 return emit_function_call(gen, inst, "spu_sin", 1);
1411 case TGSI_OPCODE_POW:
1412 return emit_function_call(gen, inst, "spu_pow", 2);
1413
1414 case TGSI_OPCODE_IF:
1415 return emit_IF(gen, inst);
1416 case TGSI_OPCODE_ELSE:
1417 return emit_ELSE(gen, inst);
1418 case TGSI_OPCODE_ENDIF:
1419 return emit_ENDIF(gen, inst);
1420
1421 case TGSI_OPCODE_DDX:
1422 return emit_DDX_DDY(gen, inst, true);
1423 case TGSI_OPCODE_DDY:
1424 return emit_DDX_DDY(gen, inst, false);
1425
1426 /* XXX lots more cases to do... */
1427
1428 default:
1429 fprintf(stderr, "Cell: unimplemented TGSI instruction %d!\n",
1430 inst->Instruction.Opcode);
1431 return false;
1432 }
1433
1434 return true;
1435 }
1436
1437
1438
1439 /**
1440 * Emit code for a TGSI immediate value (vector of four floats).
1441 * This involves register allocation and initialization.
1442 * XXX the initialization should be done by a "prepare" stage, not
1443 * per quad execution!
1444 */
1445 static boolean
1446 emit_immediate(struct codegen *gen, const struct tgsi_full_immediate *immed)
1447 {
1448 int ch;
1449
1450 assert(gen->num_imm < MAX_TEMPS);
1451
1452 spe_comment(gen->f, -4, "IMMEDIATE:");
1453
1454 for (ch = 0; ch < 4; ch++) {
1455 float val = immed->u.ImmediateFloat32[ch].Float;
1456 int reg = spe_allocate_available_register(gen->f);
1457
1458 if (reg < 0)
1459 return false;
1460
1461 /* update immediate map */
1462 gen->imm_regs[gen->num_imm][ch] = reg;
1463
1464 /* emit initializer instruction */
1465 spe_load_float(gen->f, reg, val);
1466 }
1467
1468 gen->num_imm++;
1469
1470 return true;
1471 }
1472
1473
1474
1475 /**
1476 * Emit "code" for a TGSI declaration.
1477 * We only care about TGSI TEMPORARY register declarations at this time.
1478 * For each TGSI TEMPORARY we allocate four SPE registers.
1479 */
1480 static boolean
1481 emit_declaration(struct cell_context *cell,
1482 struct codegen *gen, const struct tgsi_full_declaration *decl)
1483 {
1484 int i, ch;
1485
1486 switch (decl->Declaration.File) {
1487 case TGSI_FILE_TEMPORARY:
1488 if (cell->debug_flags & CELL_DEBUG_ASM) {
1489 printf("Declare temp reg %d .. %d\n",
1490 decl->DeclarationRange.First,
1491 decl->DeclarationRange.Last);
1492 }
1493
1494 for (i = decl->DeclarationRange.First;
1495 i <= decl->DeclarationRange.Last;
1496 i++) {
1497 assert(i < MAX_TEMPS);
1498 for (ch = 0; ch < 4; ch++) {
1499 gen->temp_regs[i][ch] = spe_allocate_available_register(gen->f);
1500 if (gen->temp_regs[i][ch] < 0)
1501 return false; /* out of regs */
1502 }
1503
1504 /* XXX if we run out of SPE registers, we need to spill
1505 * to SPU memory. someday...
1506 */
1507
1508 if (cell->debug_flags & CELL_DEBUG_ASM) {
1509 printf(" SPE regs: %d %d %d %d\n",
1510 gen->temp_regs[i][0],
1511 gen->temp_regs[i][1],
1512 gen->temp_regs[i][2],
1513 gen->temp_regs[i][3]);
1514 }
1515 }
1516 break;
1517 default:
1518 ; /* ignore */
1519 }
1520
1521 return true;
1522 }
1523
1524
1525 /**
1526 * Translate TGSI shader code to SPE instructions. This is done when
1527 * the state tracker gives us a new shader (via pipe->create_fs_state()).
1528 *
1529 * \param cell the rendering context (in)
1530 * \param tokens the TGSI shader (in)
1531 * \param f the generated function (out)
1532 */
1533 boolean
1534 cell_gen_fragment_program(struct cell_context *cell,
1535 const struct tgsi_token *tokens,
1536 struct spe_function *f)
1537 {
1538 struct tgsi_parse_context parse;
1539 struct codegen gen;
1540
1541 memset(&gen, 0, sizeof(gen));
1542 gen.cell = cell;
1543 gen.f = f;
1544
1545 /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
1546 gen.inputs_reg = 3; /* pointer to inputs array */
1547 gen.outputs_reg = 4; /* pointer to outputs array */
1548 gen.constants_reg = 5; /* pointer to constants array */
1549
1550 spe_init_func(f, SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE);
1551 spe_allocate_register(f, gen.inputs_reg);
1552 spe_allocate_register(f, gen.outputs_reg);
1553 spe_allocate_register(f, gen.constants_reg);
1554
1555 if (cell->debug_flags & CELL_DEBUG_ASM) {
1556 spe_print_code(f, true);
1557 spe_indent(f, 8);
1558 printf("Begin %s\n", __FUNCTION__);
1559 tgsi_dump(tokens, 0);
1560 }
1561
1562 tgsi_parse_init(&parse, tokens);
1563
1564 while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) {
1565 tgsi_parse_token(&parse);
1566
1567 switch (parse.FullToken.Token.Type) {
1568 case TGSI_TOKEN_TYPE_IMMEDIATE:
1569 if (!emit_immediate(&gen, &parse.FullToken.FullImmediate))
1570 gen.error = true;
1571 break;
1572
1573 case TGSI_TOKEN_TYPE_DECLARATION:
1574 if (!emit_declaration(cell, &gen, &parse.FullToken.FullDeclaration))
1575 gen.error = true;
1576 break;
1577
1578 case TGSI_TOKEN_TYPE_INSTRUCTION:
1579 if (!emit_instruction(&gen, &parse.FullToken.FullInstruction))
1580 gen.error = true;
1581 break;
1582
1583 default:
1584 assert(0);
1585 }
1586 }
1587
1588
1589 if (gen.error) {
1590 /* terminate the SPE code */
1591 return emit_END(&gen);
1592 }
1593
1594 if (cell->debug_flags & CELL_DEBUG_ASM) {
1595 printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst);
1596 printf("End %s\n", __FUNCTION__);
1597 }
1598
1599 tgsi_parse_free( &parse );
1600
1601 return !gen.error;
1602 }