Merge commit 'origin/gallium-0.1' into gallium-0.2
[mesa.git] / src / gallium / drivers / cell / ppu / cell_gen_fp.c
1 /**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29
30 /**
31 * Generate SPU fragment program/shader code.
32 *
33 * Note that we generate SOA-style code here. So each TGSI instruction
34 * operates on four pixels (and is translated into four SPU instructions,
35 * generally speaking).
36 *
37 * \author Brian Paul
38 */
39
40 #include <math.h>
41 #include "pipe/p_defines.h"
42 #include "pipe/p_state.h"
43 #include "pipe/p_shader_tokens.h"
44 #include "tgsi/tgsi_parse.h"
45 #include "tgsi/tgsi_util.h"
46 #include "tgsi/tgsi_exec.h"
47 #include "tgsi/tgsi_dump.h"
48 #include "rtasm/rtasm_ppc_spe.h"
49 #include "util/u_memory.h"
50 #include "cell_context.h"
51 #include "cell_gen_fp.h"
52
53
54 #define MAX_TEMPS 16
55 #define MAX_IMMED 8
56
57 #define CHAN_X 0
58 #define CHAN_Y 1
59 #define CHAN_Z 2
60 #define CHAN_W 3
61
62 /**
63 * Context needed during code generation.
64 */
65 struct codegen
66 {
67 struct cell_context *cell;
68 int inputs_reg; /**< 1st function parameter */
69 int outputs_reg; /**< 2nd function parameter */
70 int constants_reg; /**< 3rd function parameter */
71 int temp_regs[MAX_TEMPS][4]; /**< maps TGSI temps to SPE registers */
72 int imm_regs[MAX_IMMED][4]; /**< maps TGSI immediates to SPE registers */
73
74 int num_imm; /**< number of immediates */
75
76 int one_reg; /**< register containing {1.0, 1.0, 1.0, 1.0} */
77
78 /** Per-instruction temps / intermediate temps */
79 int num_itemps;
80 int itemps[12];
81
82 /** Current IF/ELSE/ENDIF nesting level */
83 int if_nesting;
84 /** Index of execution mask register */
85 int exec_mask_reg;
86
87 /** KIL mask: indicates which fragments have been killed */
88 int kill_mask_reg;
89
90 int frame_size; /**< Stack frame size, in words */
91
92 struct spe_function *f;
93 boolean error;
94 };
95
96
97 /**
98 * Allocate an intermediate temporary register.
99 */
100 static int
101 get_itemp(struct codegen *gen)
102 {
103 int t = spe_allocate_available_register(gen->f);
104 assert(gen->num_itemps < Elements(gen->itemps));
105 gen->itemps[gen->num_itemps++] = t;
106 return t;
107 }
108
109 /**
110 * Free all intermediate temporary registers. To be called after each
111 * instruction has been emitted.
112 */
113 static void
114 free_itemps(struct codegen *gen)
115 {
116 int i;
117 for (i = 0; i < gen->num_itemps; i++) {
118 spe_release_register(gen->f, gen->itemps[i]);
119 }
120 gen->num_itemps = 0;
121 }
122
123
124 /**
125 * Return index of an SPE register containing {1.0, 1.0, 1.0, 1.0}.
126 * The register is allocated and initialized upon the first call.
127 */
128 static int
129 get_const_one_reg(struct codegen *gen)
130 {
131 if (gen->one_reg <= 0) {
132 gen->one_reg = spe_allocate_available_register(gen->f);
133
134 spe_indent(gen->f, 4);
135 spe_comment(gen->f, -4, "INIT CONSTANT 1.0:");
136
137 /* one = {1.0, 1.0, 1.0, 1.0} */
138 spe_load_float(gen->f, gen->one_reg, 1.0f);
139
140 spe_indent(gen->f, -4);
141 }
142
143 return gen->one_reg;
144 }
145
146
147 /**
148 * Return index of the pixel execution mask.
149 * The register is allocated an initialized upon the first call.
150 *
151 * The pixel execution mask controls which pixels in a quad are
152 * modified, according to surrounding conditionals, loops, etc.
153 */
154 static int
155 get_exec_mask_reg(struct codegen *gen)
156 {
157 if (gen->exec_mask_reg <= 0) {
158 gen->exec_mask_reg = spe_allocate_available_register(gen->f);
159
160 spe_indent(gen->f, 4);
161 spe_comment(gen->f, -4, "INIT EXEC MASK = ~0:");
162
163 /* exec_mask = {~0, ~0, ~0, ~0} */
164 spe_load_int(gen->f, gen->exec_mask_reg, ~0);
165
166 spe_indent(gen->f, -4);
167 }
168
169 return gen->exec_mask_reg;
170 }
171
172
173 static boolean
174 is_register_src(struct codegen *gen, int channel,
175 const struct tgsi_full_src_register *src)
176 {
177 int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel);
178 int sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel);
179
180 if (swizzle > TGSI_SWIZZLE_W || sign_op != TGSI_UTIL_SIGN_KEEP) {
181 return FALSE;
182 }
183 if (src->SrcRegister.File == TGSI_FILE_TEMPORARY ||
184 src->SrcRegister.File == TGSI_FILE_IMMEDIATE) {
185 return TRUE;
186 }
187 return FALSE;
188 }
189
190
191 static boolean
192 is_memory_dst(struct codegen *gen, int channel,
193 const struct tgsi_full_dst_register *dst)
194 {
195 if (dst->DstRegister.File == TGSI_FILE_OUTPUT) {
196 return TRUE;
197 }
198 else {
199 return FALSE;
200 }
201 }
202
203
204 /**
205 * Return the index of the SPU temporary containing the named TGSI
206 * source register. If the TGSI register is a TGSI_FILE_TEMPORARY we
207 * just return the corresponding SPE register. If the TGIS register
208 * is TGSI_FILE_INPUT/CONSTANT/IMMEDIATE we allocate a new SPE register
209 * and emit an SPE load instruction.
210 */
211 static int
212 get_src_reg(struct codegen *gen,
213 int channel,
214 const struct tgsi_full_src_register *src)
215 {
216 int reg = -1;
217 int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel);
218 boolean reg_is_itemp = FALSE;
219 uint sign_op;
220
221 assert(swizzle >= TGSI_SWIZZLE_X);
222 assert(swizzle <= TGSI_EXTSWIZZLE_ONE);
223
224 if (swizzle == TGSI_EXTSWIZZLE_ONE) {
225 /* Load const one float and early out */
226 reg = get_const_one_reg(gen);
227 }
228 else if (swizzle == TGSI_EXTSWIZZLE_ZERO) {
229 /* Load const zero float and early out */
230 reg = get_itemp(gen);
231 spe_xor(gen->f, reg, reg, reg);
232 }
233 else {
234 assert(swizzle < 4);
235
236 switch (src->SrcRegister.File) {
237 case TGSI_FILE_TEMPORARY:
238 reg = gen->temp_regs[src->SrcRegister.Index][swizzle];
239 break;
240 case TGSI_FILE_INPUT:
241 {
242 /* offset is measured in quadwords, not bytes */
243 int offset = src->SrcRegister.Index * 4 + swizzle;
244 reg = get_itemp(gen);
245 reg_is_itemp = TRUE;
246 /* Load: reg = memory[(machine_reg) + offset] */
247 spe_lqd(gen->f, reg, gen->inputs_reg, offset * 16);
248 }
249 break;
250 case TGSI_FILE_IMMEDIATE:
251 reg = gen->imm_regs[src->SrcRegister.Index][swizzle];
252 break;
253 case TGSI_FILE_CONSTANT:
254 {
255 /* offset is measured in quadwords, not bytes */
256 int offset = src->SrcRegister.Index * 4 + swizzle;
257 reg = get_itemp(gen);
258 reg_is_itemp = TRUE;
259 /* Load: reg = memory[(machine_reg) + offset] */
260 spe_lqd(gen->f, reg, gen->constants_reg, offset * 16);
261 }
262 break;
263 default:
264 assert(0);
265 }
266 }
267
268 /*
269 * Handle absolute value, negate or set-negative of src register.
270 */
271 sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel);
272 if (sign_op != TGSI_UTIL_SIGN_KEEP) {
273 /*
274 * All sign ops are done by manipulating bit 31, the IEEE float sign bit.
275 */
276 const int bit31mask_reg = get_itemp(gen);
277 int result_reg;
278
279 if (reg_is_itemp) {
280 /* re-use 'reg' for the result */
281 result_reg = reg;
282 }
283 else {
284 /* alloc a new reg for the result */
285 result_reg = get_itemp(gen);
286 }
287
288 /* mask with bit 31 set, the rest cleared */
289 spe_load_uint(gen->f, bit31mask_reg, (1 << 31));
290
291 if (sign_op == TGSI_UTIL_SIGN_CLEAR) {
292 spe_andc(gen->f, result_reg, reg, bit31mask_reg);
293 }
294 else if (sign_op == TGSI_UTIL_SIGN_SET) {
295 spe_and(gen->f, result_reg, reg, bit31mask_reg);
296 }
297 else {
298 assert(sign_op == TGSI_UTIL_SIGN_TOGGLE);
299 spe_xor(gen->f, result_reg, reg, bit31mask_reg);
300 }
301
302 reg = result_reg;
303 }
304
305 return reg;
306 }
307
308
309 /**
310 * Return the index of an SPE register to use for the given TGSI register.
311 * If the TGSI register is TGSI_FILE_TEMPORARAY, the index of the
312 * corresponding SPE register is returned. If the TGSI register is
313 * TGSI_FILE_OUTPUT we allocate an intermediate temporary register.
314 * See store_dest_reg() below...
315 */
316 static int
317 get_dst_reg(struct codegen *gen,
318 int channel,
319 const struct tgsi_full_dst_register *dest)
320 {
321 int reg = -1;
322
323 switch (dest->DstRegister.File) {
324 case TGSI_FILE_TEMPORARY:
325 if (gen->if_nesting > 0)
326 reg = get_itemp(gen);
327 else
328 reg = gen->temp_regs[dest->DstRegister.Index][channel];
329 break;
330 case TGSI_FILE_OUTPUT:
331 reg = get_itemp(gen);
332 break;
333 default:
334 assert(0);
335 }
336
337 return reg;
338 }
339
340
341 /**
342 * When a TGSI instruction is writing to an output register, this
343 * function emits the SPE store instruction to store the value_reg.
344 * \param value_reg the SPE register containing the value to store.
345 * This would have been returned by get_dst_reg().
346 */
347 static void
348 store_dest_reg(struct codegen *gen,
349 int value_reg, int channel,
350 const struct tgsi_full_dst_register *dest)
351 {
352 /*
353 * XXX need to implement dst reg clamping/saturation
354 */
355 #if 0
356 switch (inst->Instruction.Saturate) {
357 case TGSI_SAT_NONE:
358 break;
359 case TGSI_SAT_ZERO_ONE:
360 break;
361 case TGSI_SAT_MINUS_PLUS_ONE:
362 break;
363 default:
364 assert( 0 );
365 }
366 #endif
367
368 switch (dest->DstRegister.File) {
369 case TGSI_FILE_TEMPORARY:
370 if (gen->if_nesting > 0) {
371 int d_reg = gen->temp_regs[dest->DstRegister.Index][channel];
372 int exec_reg = get_exec_mask_reg(gen);
373 /* Mix d with new value according to exec mask:
374 * d[i] = mask_reg[i] ? value_reg : d_reg
375 */
376 spe_selb(gen->f, d_reg, d_reg, value_reg, exec_reg);
377 }
378 else {
379 /* we're not inside a condition or loop: do nothing special */
380
381 }
382 break;
383 case TGSI_FILE_OUTPUT:
384 {
385 /* offset is measured in quadwords, not bytes */
386 int offset = dest->DstRegister.Index * 4 + channel;
387 if (gen->if_nesting > 0) {
388 int exec_reg = get_exec_mask_reg(gen);
389 int curval_reg = get_itemp(gen);
390 /* First read the current value from memory:
391 * Load: curval = memory[(machine_reg) + offset]
392 */
393 spe_lqd(gen->f, curval_reg, gen->outputs_reg, offset * 16);
394 /* Mix curval with newvalue according to exec mask:
395 * d[i] = mask_reg[i] ? value_reg : d_reg
396 */
397 spe_selb(gen->f, curval_reg, curval_reg, value_reg, exec_reg);
398 /* Store: memory[(machine_reg) + offset] = curval */
399 spe_stqd(gen->f, curval_reg, gen->outputs_reg, offset * 16);
400 }
401 else {
402 /* Store: memory[(machine_reg) + offset] = reg */
403 spe_stqd(gen->f, value_reg, gen->outputs_reg, offset * 16);
404 }
405 }
406 break;
407 default:
408 assert(0);
409 }
410 }
411
412
413
414 static void
415 emit_prologue(struct codegen *gen)
416 {
417 gen->frame_size = 1024; /* XXX temporary, should be dynamic */
418
419 spe_comment(gen->f, -4, "Function prologue:");
420
421 /* save $lr on stack # stqd $lr,16($sp) */
422 spe_stqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16);
423
424 if (gen->frame_size >= 512) {
425 /* offset is too large for ai instruction */
426 int offset_reg = spe_allocate_available_register(gen->f);
427 int sp_reg = spe_allocate_available_register(gen->f);
428 /* offset = -framesize */
429 spe_load_int(gen->f, offset_reg, -gen->frame_size);
430 /* sp = $sp */
431 spe_move(gen->f, sp_reg, SPE_REG_SP);
432 /* $sp = $sp + offset_reg */
433 spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg);
434 /* save $sp in stack frame */
435 spe_stqd(gen->f, sp_reg, SPE_REG_SP, 0);
436 /* clean up */
437 spe_release_register(gen->f, offset_reg);
438 spe_release_register(gen->f, sp_reg);
439 }
440 else {
441 /* save stack pointer # stqd $sp,-frameSize($sp) */
442 spe_stqd(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size);
443
444 /* adjust stack pointer # ai $sp,$sp,-frameSize */
445 spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size);
446 }
447 }
448
449
450 static void
451 emit_epilogue(struct codegen *gen)
452 {
453 const int return_reg = 3;
454
455 spe_comment(gen->f, -4, "Function epilogue:");
456
457 spe_comment(gen->f, 0, "return the killed mask");
458 if (gen->kill_mask_reg > 0) {
459 /* shader called KIL, return the "alive" mask */
460 spe_move(gen->f, return_reg, gen->kill_mask_reg);
461 }
462 else {
463 /* return {0,0,0,0} */
464 spe_load_uint(gen->f, return_reg, 0);
465 }
466
467 spe_comment(gen->f, 0, "restore stack and return");
468 if (gen->frame_size >= 512) {
469 /* offset is too large for ai instruction */
470 int offset_reg = spe_allocate_available_register(gen->f);
471 /* offset = framesize */
472 spe_load_int(gen->f, offset_reg, gen->frame_size);
473 /* $sp = $sp + offset */
474 spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg);
475 /* clean up */
476 spe_release_register(gen->f, offset_reg);
477 }
478 else {
479 /* restore stack pointer # ai $sp,$sp,frameSize */
480 spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, gen->frame_size);
481 }
482
483 /* restore $lr # lqd $lr,16($sp) */
484 spe_lqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16);
485
486 /* return from function call */
487 spe_bi(gen->f, SPE_REG_RA, 0, 0);
488 }
489
490
491 static boolean
492 emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst)
493 {
494 int ch, src_reg[4], dst_reg[4];
495
496 spe_comment(gen->f, -4, "MOV:");
497 for (ch = 0; ch < 4; ch++) {
498 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
499 src_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
500 dst_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
501 }
502 }
503
504 for (ch = 0; ch < 4; ch++) {
505 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
506 if (is_register_src(gen, ch, &inst->FullSrcRegisters[0]) &&
507 is_memory_dst(gen, ch, &inst->FullDstRegisters[0])) {
508 /* special-case: register to memory store */
509 store_dest_reg(gen, src_reg[ch], ch, &inst->FullDstRegisters[0]);
510 }
511 else {
512 spe_move(gen->f, dst_reg[ch], src_reg[ch]);
513 store_dest_reg(gen, dst_reg[ch], ch, &inst->FullDstRegisters[0]);
514 }
515 free_itemps(gen);
516 }
517 }
518 return true;
519 }
520
521 /**
522 * Emit addition instructions. Recall that a single TGSI_OPCODE_ADD
523 * becomes (up to) four SPU "fa" instructions because we're doing SOA
524 * processing.
525 */
526 static boolean
527 emit_ADD(struct codegen *gen, const struct tgsi_full_instruction *inst)
528 {
529 int ch, s1_reg[4], s2_reg[4], d_reg[4];
530
531 spe_comment(gen->f, -4, "ADD:");
532 /* Loop over Red/Green/Blue/Alpha channels, fetch src operands */
533 for (ch = 0; ch < 4; ch++) {
534 /* If the dest R, G, B or A writemask is enabled... */
535 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
536 s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
537 s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
538 d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
539 }
540 }
541 /* Loop over Red/Green/Blue/Alpha channels, do the add, store results */
542 for (ch = 0; ch < 4; ch++) {
543 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
544 /* Emit actual SPE instruction: d = s1 + s2 */
545 spe_fa(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
546 /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
547 store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
548 /* Free any intermediate temps we allocated */
549 free_itemps(gen);
550 }
551 }
552 return true;
553 }
554
555 /**
556 * Emit subtract. See emit_ADD for comments.
557 */
558 static boolean
559 emit_SUB(struct codegen *gen, const struct tgsi_full_instruction *inst)
560 {
561 int ch, s1_reg[4], s2_reg[4], d_reg[4];
562 spe_comment(gen->f, -4, "SUB:");
563 for (ch = 0; ch < 4; ch++) {
564 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
565 s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
566 s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
567 d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
568 }
569 }
570 for (ch = 0; ch < 4; ch++) {
571 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
572 /* d = s1 - s2 */
573 spe_fs(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
574 store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
575 free_itemps(gen);
576 }
577 }
578 return true;
579 }
580
581 /**
582 * Emit multiply add. See emit_ADD for comments.
583 */
584 static boolean
585 emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst)
586 {
587 int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4];
588 spe_comment(gen->f, -4, "MAD:");
589 for (ch = 0; ch < 4; ch++) {
590 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
591 s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
592 s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
593 s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
594 d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
595 }
596 }
597 for (ch = 0; ch < 4; ch++) {
598 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
599 /* d = s1 * s2 + s3 */
600 spe_fma(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch], s3_reg[ch]);
601 store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
602 free_itemps(gen);
603 }
604 }
605 return true;
606 }
607
608
609 /**
610 * Emit linear interpolate. See emit_ADD for comments.
611 */
612 static boolean
613 emit_LERP(struct codegen *gen, const struct tgsi_full_instruction *inst)
614 {
615 int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4], tmp_reg[4];
616 spe_comment(gen->f, -4, "LERP:");
617 /* setup/get src/dst/temp regs */
618 for (ch = 0; ch < 4; ch++) {
619 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
620 s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
621 s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
622 s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
623 d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
624 tmp_reg[ch] = get_itemp(gen);
625 }
626 }
627
628 /* d = s3 + s1(s2 - s3) */
629 /* do all subtracts, then all fma, then all stores to better pipeline */
630 for (ch = 0; ch < 4; ch++) {
631 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
632 spe_fs(gen->f, tmp_reg[ch], s2_reg[ch], s3_reg[ch]);
633 }
634 }
635 for (ch = 0; ch < 4; ch++) {
636 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
637 spe_fma(gen->f, d_reg[ch], tmp_reg[ch], s1_reg[ch], s3_reg[ch]);
638 }
639 }
640 for (ch = 0; ch < 4; ch++) {
641 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
642 store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
643 }
644 }
645 free_itemps(gen);
646 return true;
647 }
648
649 /**
650 * Emit multiply. See emit_ADD for comments.
651 */
652 static boolean
653 emit_MUL(struct codegen *gen, const struct tgsi_full_instruction *inst)
654 {
655 int ch, s1_reg[4], s2_reg[4], d_reg[4];
656 spe_comment(gen->f, -4, "MUL:");
657 for (ch = 0; ch < 4; ch++) {
658 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
659 s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
660 s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
661 d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
662 }
663 }
664 for (ch = 0; ch < 4; ch++) {
665 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
666 /* d = s1 * s2 */
667 spe_fm(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
668 store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
669 free_itemps(gen);
670 }
671 }
672 return true;
673 }
674
675 /**
676 * Emit reciprocal. See emit_ADD for comments.
677 */
678 static boolean
679 emit_RCP(struct codegen *gen, const struct tgsi_full_instruction *inst)
680 {
681 int ch;
682 spe_comment(gen->f, -4, "RCP:");
683 for (ch = 0; ch < 4; ch++) {
684 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
685 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
686 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
687 /* d = 1/s1 */
688 spe_frest(gen->f, d_reg, s1_reg);
689 spe_fi(gen->f, d_reg, s1_reg, d_reg);
690 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
691 free_itemps(gen);
692 }
693 }
694 return true;
695 }
696
697 /**
698 * Emit reciprocal sqrt. See emit_ADD for comments.
699 */
700 static boolean
701 emit_RSQ(struct codegen *gen, const struct tgsi_full_instruction *inst)
702 {
703 int ch;
704 spe_comment(gen->f, -4, "RSQ:");
705 for (ch = 0; ch < 4; ch++) {
706 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
707 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
708 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
709 /* d = 1/s1 */
710 spe_frsqest(gen->f, d_reg, s1_reg);
711 spe_fi(gen->f, d_reg, s1_reg, d_reg);
712 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
713 free_itemps(gen);
714 }
715 }
716 return true;
717 }
718
719 /**
720 * Emit absolute value. See emit_ADD for comments.
721 */
722 static boolean
723 emit_ABS(struct codegen *gen, const struct tgsi_full_instruction *inst)
724 {
725 int ch;
726 spe_comment(gen->f, -4, "ABS:");
727 for (ch = 0; ch < 4; ch++) {
728 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
729 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
730 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
731 const int bit31mask_reg = get_itemp(gen);
732
733 /* mask with bit 31 set, the rest cleared */
734 spe_load_uint(gen->f, bit31mask_reg, (1 << 31));
735
736 /* d = sign bit cleared in s1 */
737 spe_andc(gen->f, d_reg, s1_reg, bit31mask_reg);
738
739 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
740 free_itemps(gen);
741 }
742 }
743 return true;
744 }
745
746 /**
747 * Emit 3 component dot product. See emit_ADD for comments.
748 */
749 static boolean
750 emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst)
751 {
752 int ch;
753 int s1x_reg, s1y_reg, s1z_reg;
754 int s2x_reg, s2y_reg, s2z_reg;
755 int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen);
756
757 spe_comment(gen->f, -4, "DP3:");
758
759 s1x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
760 s2x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
761 s1y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
762 s2y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
763 s1z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
764 s2z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
765
766 /* t0 = x0 * x1 */
767 spe_fm(gen->f, t0_reg, s1x_reg, s2x_reg);
768
769 /* t1 = y0 * y1 */
770 spe_fm(gen->f, t1_reg, s1y_reg, s2y_reg);
771
772 /* t0 = z0 * z1 + t0 */
773 spe_fma(gen->f, t0_reg, s1z_reg, s2z_reg, t0_reg);
774
775 /* t0 = t0 + t1 */
776 spe_fa(gen->f, t0_reg, t0_reg, t1_reg);
777
778 for (ch = 0; ch < 4; ch++) {
779 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
780 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
781 spe_move(gen->f, d_reg, t0_reg);
782 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
783 }
784 }
785
786 free_itemps(gen);
787 return true;
788 }
789
790 /**
791 * Emit 4 component dot product. See emit_ADD for comments.
792 */
793 static boolean
794 emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst)
795 {
796 int ch;
797 int s0x_reg, s0y_reg, s0z_reg, s0w_reg;
798 int s1x_reg, s1y_reg, s1z_reg, s1w_reg;
799 int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen);
800
801 spe_comment(gen->f, -4, "DP4:");
802
803 s0x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
804 s1x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
805 s0y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
806 s1y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
807 s0z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
808 s1z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
809 s0w_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[0]);
810 s1w_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]);
811
812 /* t0 = x0 * x1 */
813 spe_fm(gen->f, t0_reg, s0x_reg, s1x_reg);
814
815 /* t1 = y0 * y1 */
816 spe_fm(gen->f, t1_reg, s0y_reg, s1y_reg);
817
818 /* t0 = z0 * z1 + t0 */
819 spe_fma(gen->f, t0_reg, s0z_reg, s1z_reg, t0_reg);
820
821 /* t1 = w0 * w1 + t1 */
822 spe_fma(gen->f, t1_reg, s0w_reg, s1w_reg, t1_reg);
823
824 /* t0 = t0 + t1 */
825 spe_fa(gen->f, t0_reg, t0_reg, t1_reg);
826
827 for (ch = 0; ch < 4; ch++) {
828 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
829 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
830 spe_move(gen->f, d_reg, t0_reg);
831 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
832 }
833 }
834
835 free_itemps(gen);
836 return true;
837 }
838
839 /**
840 * Emit homogeneous dot product. See emit_ADD for comments.
841 */
842 static boolean
843 emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst)
844 {
845 /* XXX rewrite this function to look more like DP3/DP4 */
846 int ch;
847 spe_comment(gen->f, -4, "DPH:");
848
849 int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
850 int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
851 int tmp_reg = get_itemp(gen);
852
853 /* t = x0 * x1 */
854 spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
855
856 s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
857 s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
858 /* t = y0 * y1 + t */
859 spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
860
861 s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
862 s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
863 /* t = z0 * z1 + t */
864 spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
865
866 s2_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]);
867 /* t = w1 + t */
868 spe_fa(gen->f, tmp_reg, s2_reg, tmp_reg);
869
870 for (ch = 0; ch < 4; ch++) {
871 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
872 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
873 spe_move(gen->f, d_reg, tmp_reg);
874 store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]);
875 }
876 }
877
878 free_itemps(gen);
879 return true;
880 }
881
882 /**
883 * Emit cross product. See emit_ADD for comments.
884 */
885 static boolean
886 emit_XPD(struct codegen *gen, const struct tgsi_full_instruction *inst)
887 {
888 spe_comment(gen->f, -4, "XPD:");
889
890 int s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
891 int s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
892 int tmp_reg = get_itemp(gen);
893
894 /* t = z0 * y1 */
895 spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
896
897 s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
898 s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
899 /* t = y0 * z1 - t */
900 spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
901
902 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_X)) {
903 store_dest_reg(gen, tmp_reg, CHAN_X, &inst->FullDstRegisters[0]);
904 }
905
906 s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
907 s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
908 /* t = x0 * z1 */
909 spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
910
911 s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
912 s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
913 /* t = z0 * x1 - t */
914 spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
915
916 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Y)) {
917 store_dest_reg(gen, tmp_reg, CHAN_Y, &inst->FullDstRegisters[0]);
918 }
919
920 s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
921 s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
922 /* t = y0 * x1 */
923 spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
924
925 s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
926 s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
927 /* t = x0 * y1 - t */
928 spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
929
930 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Z)) {
931 store_dest_reg(gen, tmp_reg, CHAN_Z, &inst->FullDstRegisters[0]);
932 }
933
934 free_itemps(gen);
935 return true;
936 }
937
938 /**
939 * Emit set-if-greater-than.
940 * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as
941 * the result but OpenGL/TGSI needs 0.0 and 1.0 results.
942 * We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND.
943 */
944 static boolean
945 emit_SGT(struct codegen *gen, const struct tgsi_full_instruction *inst)
946 {
947 int ch;
948
949 spe_comment(gen->f, -4, "SGT:");
950
951 for (ch = 0; ch < 4; ch++) {
952 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
953 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
954 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
955 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
956
957 /* d = (s1 > s2) */
958 spe_fcgt(gen->f, d_reg, s1_reg, s2_reg);
959
960 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
961 /* d = d & one_reg */
962 spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
963
964 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
965 free_itemps(gen);
966 }
967 }
968
969 return true;
970 }
971
972 /**
973 * Emit set-if_less-then. See emit_SGT for comments.
974 */
975 static boolean
976 emit_SLT(struct codegen *gen, const struct tgsi_full_instruction *inst)
977 {
978 int ch;
979
980 spe_comment(gen->f, -4, "SLT:");
981
982 for (ch = 0; ch < 4; ch++) {
983 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
984 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
985 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
986 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
987
988 /* d = (s1 < s2) */
989 spe_fcgt(gen->f, d_reg, s2_reg, s1_reg);
990
991 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
992 /* d = d & one_reg */
993 spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
994
995 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
996 free_itemps(gen);
997 }
998 }
999
1000 return true;
1001 }
1002
1003 /**
1004 * Emit set-if_greater-then-or-equal. See emit_SGT for comments.
1005 */
1006 static boolean
1007 emit_SGE(struct codegen *gen, const struct tgsi_full_instruction *inst)
1008 {
1009 int ch;
1010
1011 spe_comment(gen->f, -4, "SGE:");
1012
1013 for (ch = 0; ch < 4; ch++) {
1014 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1015 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1016 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
1017 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1018
1019 /* d = (s1 >= s2) */
1020 spe_fcgt(gen->f, d_reg, s2_reg, s1_reg);
1021
1022 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
1023 /* d = ~d & one_reg */
1024 spe_andc(gen->f, d_reg, get_const_one_reg(gen), d_reg);
1025
1026 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1027 free_itemps(gen);
1028 }
1029 }
1030
1031 return true;
1032 }
1033
1034 /**
1035 * Emit set-if_less-then-or-equal. See emit_SGT for comments.
1036 */
1037 static boolean
1038 emit_SLE(struct codegen *gen, const struct tgsi_full_instruction *inst)
1039 {
1040 int ch;
1041
1042 spe_comment(gen->f, -4, "SLE:");
1043
1044 for (ch = 0; ch < 4; ch++) {
1045 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1046 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1047 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
1048 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1049
1050 /* d = (s1 <= s2) */
1051 spe_fcgt(gen->f, d_reg, s1_reg, s2_reg);
1052
1053 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
1054 /* d = ~d & one_reg */
1055 spe_andc(gen->f, d_reg, get_const_one_reg(gen), d_reg);
1056
1057 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1058 free_itemps(gen);
1059 }
1060 }
1061
1062 return true;
1063 }
1064
1065 /**
1066 * Emit set-if_equal. See emit_SGT for comments.
1067 */
1068 static boolean
1069 emit_SEQ(struct codegen *gen, const struct tgsi_full_instruction *inst)
1070 {
1071 int ch;
1072
1073 spe_comment(gen->f, -4, "SEQ:");
1074
1075 for (ch = 0; ch < 4; ch++) {
1076 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1077 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1078 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
1079 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1080
1081 /* d = (s1 == s2) */
1082 spe_fceq(gen->f, d_reg, s1_reg, s2_reg);
1083
1084 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
1085 /* d = d & one_reg */
1086 spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
1087
1088 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1089 free_itemps(gen);
1090 }
1091 }
1092
1093 return true;
1094 }
1095
1096 /**
1097 * Emit set-if_not_equal. See emit_SGT for comments.
1098 */
1099 static boolean
1100 emit_SNE(struct codegen *gen, const struct tgsi_full_instruction *inst)
1101 {
1102 int ch;
1103
1104 spe_comment(gen->f, -4, "SNE:");
1105
1106 for (ch = 0; ch < 4; ch++) {
1107 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1108 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1109 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
1110 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1111
1112 /* d = (s1 != s2) */
1113 spe_fceq(gen->f, d_reg, s1_reg, s2_reg);
1114 spe_nor(gen->f, d_reg, d_reg, d_reg);
1115
1116 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
1117 /* d = d & one_reg */
1118 spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
1119
1120 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1121 free_itemps(gen);
1122 }
1123 }
1124
1125 return true;
1126 }
1127
1128 /**
1129 * Emit compare. See emit_SGT for comments.
1130 */
1131 static boolean
1132 emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst)
1133 {
1134 int ch;
1135
1136 spe_comment(gen->f, -4, "CMP:");
1137
1138 for (ch = 0; ch < 4; ch++) {
1139 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1140 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1141 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
1142 int s3_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
1143 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1144 int zero_reg = get_itemp(gen);
1145
1146 spe_xor(gen->f, zero_reg, zero_reg, zero_reg);
1147
1148 /* d = (s1 < 0) ? s2 : s3 */
1149 spe_fcgt(gen->f, d_reg, zero_reg, s1_reg);
1150 spe_selb(gen->f, d_reg, s3_reg, s2_reg, d_reg);
1151
1152 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1153 free_itemps(gen);
1154 }
1155 }
1156
1157 return true;
1158 }
1159
1160 /**
1161 * Emit trunc.
1162 * Convert float to signed int
1163 * Convert signed int to float
1164 */
1165 static boolean
1166 emit_TRUNC(struct codegen *gen, const struct tgsi_full_instruction *inst)
1167 {
1168 int ch;
1169
1170 spe_comment(gen->f, -4, "TRUNC:");
1171
1172 for (ch = 0; ch < 4; ch++) {
1173 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1174 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1175 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1176
1177 /* Convert float to int */
1178 spe_cflts(gen->f, d_reg, s1_reg, 0);
1179
1180 /* Convert int to float */
1181 spe_csflt(gen->f, d_reg, d_reg, 0);
1182
1183 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1184 free_itemps(gen);
1185 }
1186 }
1187
1188 return true;
1189 }
1190
1191 /**
1192 * Emit floor.
1193 * If negative int subtract one
1194 * Convert float to signed int
1195 * Convert signed int to float
1196 */
1197 static boolean
1198 emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst)
1199 {
1200 int ch;
1201
1202 spe_comment(gen->f, -4, "FLR:");
1203
1204 int zero_reg = get_itemp(gen);
1205 spe_xor(gen->f, zero_reg, zero_reg, zero_reg);
1206
1207 for (ch = 0; ch < 4; ch++) {
1208 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1209 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1210 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1211 int tmp_reg = get_itemp(gen);
1212
1213 /* If negative, subtract 1.0 */
1214 spe_fcgt(gen->f, tmp_reg, zero_reg, s1_reg);
1215 spe_selb(gen->f, tmp_reg, zero_reg, get_const_one_reg(gen), tmp_reg);
1216 spe_fs(gen->f, tmp_reg, s1_reg, tmp_reg);
1217
1218 /* Convert float to int */
1219 spe_cflts(gen->f, tmp_reg, tmp_reg, 0);
1220
1221 /* Convert int to float */
1222 spe_csflt(gen->f, d_reg, tmp_reg, 0);
1223
1224 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1225 free_itemps(gen);
1226 }
1227 }
1228
1229 return true;
1230 }
1231
1232 /**
1233 * Compute frac = Input - FLR(Input)
1234 */
1235 static boolean
1236 emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst)
1237 {
1238 int ch;
1239
1240 spe_comment(gen->f, -4, "FRC:");
1241
1242 int zero_reg = get_itemp(gen);
1243 spe_xor(gen->f, zero_reg, zero_reg, zero_reg);
1244
1245 for (ch = 0; ch < 4; ch++) {
1246 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1247 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1248 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1249 int tmp_reg = get_itemp(gen);
1250
1251 /* If negative, subtract 1.0 */
1252 spe_fcgt(gen->f, tmp_reg, zero_reg, s1_reg);
1253 spe_selb(gen->f, tmp_reg, zero_reg, get_const_one_reg(gen), tmp_reg);
1254 spe_fs(gen->f, tmp_reg, s1_reg, tmp_reg);
1255
1256 /* Convert float to int */
1257 spe_cflts(gen->f, tmp_reg, tmp_reg, 0);
1258
1259 /* Convert int to float */
1260 spe_csflt(gen->f, tmp_reg, tmp_reg, 0);
1261
1262 /* d = s1 - FLR(s1) */
1263 spe_fs(gen->f, d_reg, s1_reg, tmp_reg);
1264
1265 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1266 free_itemps(gen);
1267 }
1268 }
1269
1270 return true;
1271 }
1272
1273
1274 #if 0
1275 static void
1276 print_functions(struct cell_context *cell)
1277 {
1278 struct cell_spu_function_info *funcs = &cell->spu_functions;
1279 uint i;
1280 for (i = 0; i < funcs->num; i++) {
1281 printf("SPU func %u: %s at %u\n",
1282 i, funcs->names[i], funcs->addrs[i]);
1283 }
1284 }
1285 #endif
1286
1287
1288 static uint
1289 lookup_function(struct cell_context *cell, const char *funcname)
1290 {
1291 const struct cell_spu_function_info *funcs = &cell->spu_functions;
1292 uint i, addr = 0;
1293 for (i = 0; i < funcs->num; i++) {
1294 if (strcmp(funcs->names[i], funcname) == 0) {
1295 addr = funcs->addrs[i];
1296 }
1297 }
1298 assert(addr && "spu function not found");
1299 return addr / 4; /* discard 2 least significant bits */
1300 }
1301
1302
1303 /**
1304 * Emit code to call a SPU function.
1305 * Used to implement instructions like SIN/COS/POW/TEX/etc.
1306 * If scalar, only the X components of the src regs are used, and the
1307 * result is replicated across the dest register's XYZW components.
1308 */
1309 static boolean
1310 emit_function_call(struct codegen *gen,
1311 const struct tgsi_full_instruction *inst,
1312 char *funcname, uint num_args, boolean scalar)
1313 {
1314 const uint addr = lookup_function(gen->cell, funcname);
1315 char comment[100];
1316 int s_regs[3];
1317 int func_called = FALSE;
1318 uint a, ch;
1319 int retval_reg = -1;
1320
1321 assert(num_args <= 3);
1322
1323 snprintf(comment, sizeof(comment), "CALL %s:", funcname);
1324 spe_comment(gen->f, -4, comment);
1325
1326 if (scalar) {
1327 for (a = 0; a < num_args; a++) {
1328 s_regs[a] = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[a]);
1329 }
1330 /* we'll call the function, put the return value in this register,
1331 * then replicate it across all write-enabled components in d_reg.
1332 */
1333 retval_reg = spe_allocate_available_register(gen->f);
1334 }
1335
1336 for (ch = 0; ch < 4; ch++) {
1337 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1338 int d_reg;
1339 ubyte usedRegs[SPE_NUM_REGS];
1340 uint i, numUsed;
1341
1342 if (!scalar) {
1343 for (a = 0; a < num_args; a++) {
1344 s_regs[a] = get_src_reg(gen, ch, &inst->FullSrcRegisters[a]);
1345 }
1346 }
1347
1348 d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1349
1350 if (!scalar || !func_called) {
1351 /* for a scalar function, we'll really only call the function once */
1352
1353 numUsed = spe_get_registers_used(gen->f, usedRegs);
1354 assert(numUsed < gen->frame_size / 16 - 2);
1355
1356 /* save registers to stack */
1357 for (i = 0; i < numUsed; i++) {
1358 uint reg = usedRegs[i];
1359 int offset = 2 + i;
1360 spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset);
1361 }
1362
1363 /* setup function arguments */
1364 for (a = 0; a < num_args; a++) {
1365 spe_move(gen->f, 3 + a, s_regs[a]);
1366 }
1367
1368 /* branch to function, save return addr */
1369 spe_brasl(gen->f, SPE_REG_RA, addr);
1370
1371 /* save function's return value */
1372 if (scalar)
1373 spe_move(gen->f, retval_reg, 3);
1374 else
1375 spe_move(gen->f, d_reg, 3);
1376
1377 /* restore registers from stack */
1378 for (i = 0; i < numUsed; i++) {
1379 uint reg = usedRegs[i];
1380 if (reg != d_reg && reg != retval_reg) {
1381 int offset = 2 + i;
1382 spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset);
1383 }
1384 }
1385
1386 func_called = TRUE;
1387 }
1388
1389 if (scalar) {
1390 spe_move(gen->f, d_reg, retval_reg);
1391 }
1392
1393 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1394 free_itemps(gen);
1395 }
1396 }
1397
1398 if (scalar) {
1399 spe_release_register(gen->f, retval_reg);
1400 }
1401
1402 return true;
1403 }
1404
1405
1406 static boolean
1407 emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst)
1408 {
1409 const uint target = inst->InstructionExtTexture.Texture;
1410 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
1411 uint addr;
1412 int ch;
1413 int coord_regs[4], d_regs[4];
1414
1415 switch (target) {
1416 case TGSI_TEXTURE_1D:
1417 case TGSI_TEXTURE_2D:
1418 addr = lookup_function(gen->cell, "spu_tex_2d");
1419 break;
1420 case TGSI_TEXTURE_3D:
1421 addr = lookup_function(gen->cell, "spu_tex_3d");
1422 break;
1423 case TGSI_TEXTURE_CUBE:
1424 addr = lookup_function(gen->cell, "spu_tex_cube");
1425 break;
1426 default:
1427 ASSERT(0 && "unsupported texture target");
1428 return FALSE;
1429 }
1430
1431 assert(inst->FullSrcRegisters[1].SrcRegister.File == TGSI_FILE_SAMPLER);
1432
1433 spe_comment(gen->f, -4, "CALL tex:");
1434
1435 /* get src/dst reg info */
1436 for (ch = 0; ch < 4; ch++) {
1437 coord_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1438 d_regs[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1439 }
1440
1441 {
1442 ubyte usedRegs[SPE_NUM_REGS];
1443 uint i, numUsed;
1444
1445 numUsed = spe_get_registers_used(gen->f, usedRegs);
1446 assert(numUsed < gen->frame_size / 16 - 2);
1447
1448 /* save registers to stack */
1449 for (i = 0; i < numUsed; i++) {
1450 uint reg = usedRegs[i];
1451 int offset = 2 + i;
1452 spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset);
1453 }
1454
1455 /* setup function arguments (XXX depends on target) */
1456 for (i = 0; i < 4; i++) {
1457 spe_move(gen->f, 3 + i, coord_regs[i]);
1458 }
1459 spe_load_uint(gen->f, 7, unit); /* sampler unit */
1460
1461 /* branch to function, save return addr */
1462 spe_brasl(gen->f, SPE_REG_RA, addr);
1463
1464 /* save function's return values (four pixel's colors) */
1465 for (i = 0; i < 4; i++) {
1466 spe_move(gen->f, d_regs[i], 3 + i);
1467 }
1468
1469 /* restore registers from stack */
1470 for (i = 0; i < numUsed; i++) {
1471 uint reg = usedRegs[i];
1472 if (reg != d_regs[0] &&
1473 reg != d_regs[1] &&
1474 reg != d_regs[2] &&
1475 reg != d_regs[3]) {
1476 int offset = 2 + i;
1477 spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset);
1478 }
1479 }
1480 }
1481
1482 for (ch = 0; ch < 4; ch++) {
1483 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1484 store_dest_reg(gen, d_regs[ch], ch, &inst->FullDstRegisters[0]);
1485 free_itemps(gen);
1486 }
1487 }
1488
1489 return TRUE;
1490 }
1491
1492
1493 /**
1494 * KILL if any of src reg values are less than zero.
1495 */
1496 static boolean
1497 emit_KIL(struct codegen *gen, const struct tgsi_full_instruction *inst)
1498 {
1499 int ch;
1500 int s_regs[4], kil_reg = -1, cmp_reg, zero_reg;
1501
1502 spe_comment(gen->f, -4, "CALL kil:");
1503
1504 /* zero = {0,0,0,0} */
1505 zero_reg = get_itemp(gen);
1506 spe_load_uint(gen->f, zero_reg, 0);
1507
1508 cmp_reg = get_itemp(gen);
1509
1510 /* get src regs */
1511 for (ch = 0; ch < 4; ch++) {
1512 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1513 s_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1514 }
1515 }
1516
1517 /* test if any src regs are < 0 */
1518 for (ch = 0; ch < 4; ch++) {
1519 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1520 if (kil_reg >= 0) {
1521 /* cmp = 0 > src ? : ~0 : 0 */
1522 spe_fcgt(gen->f, cmp_reg, zero_reg, s_regs[ch]);
1523 /* kil = kil | cmp */
1524 spe_or(gen->f, kil_reg, kil_reg, cmp_reg);
1525 }
1526 else {
1527 kil_reg = get_itemp(gen);
1528 /* kil = 0 > src ? : ~0 : 0 */
1529 spe_fcgt(gen->f, kil_reg, zero_reg, s_regs[ch]);
1530 }
1531 }
1532 }
1533
1534 if (gen->if_nesting) {
1535 /* may have been a conditional kil */
1536 spe_and(gen->f, kil_reg, kil_reg, gen->exec_mask_reg);
1537 }
1538
1539 /* allocate the kill mask reg if needed */
1540 if (gen->kill_mask_reg <= 0) {
1541 gen->kill_mask_reg = spe_allocate_available_register(gen->f);
1542 spe_move(gen->f, gen->kill_mask_reg, kil_reg);
1543 }
1544 else {
1545 spe_or(gen->f, gen->kill_mask_reg, gen->kill_mask_reg, kil_reg);
1546 }
1547
1548 free_itemps(gen);
1549
1550 return TRUE;
1551 }
1552
1553
1554
1555 /**
1556 * Emit max. See emit_SGT for comments.
1557 */
1558 static boolean
1559 emit_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst)
1560 {
1561 int ch, s0_reg[4], s1_reg[4], d_reg[4], tmp_reg[4];
1562
1563 spe_comment(gen->f, -4, "MAX:");
1564
1565 for (ch = 0; ch < 4; ch++) {
1566 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1567 s0_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1568 s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
1569 d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1570 tmp_reg[ch] = get_itemp(gen);
1571 }
1572 }
1573
1574 /* d = (s0 > s1) ? s0 : s1 */
1575 for (ch = 0; ch < 4; ch++) {
1576 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1577 spe_fcgt(gen->f, tmp_reg[ch], s0_reg[ch], s1_reg[ch]);
1578 }
1579 }
1580 for (ch = 0; ch < 4; ch++) {
1581 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1582 spe_selb(gen->f, d_reg[ch], s1_reg[ch], s0_reg[ch], tmp_reg[ch]);
1583 }
1584 }
1585
1586 for (ch = 0; ch < 4; ch++) {
1587 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1588 store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
1589 }
1590 }
1591
1592 free_itemps(gen);
1593 return true;
1594 }
1595
1596 /**
1597 * Emit max. See emit_SGT for comments.
1598 */
1599 static boolean
1600 emit_MIN(struct codegen *gen, const struct tgsi_full_instruction *inst)
1601 {
1602 int ch, s0_reg[4], s1_reg[4], d_reg[4], tmp_reg[4];
1603
1604 spe_comment(gen->f, -4, "MIN:");
1605
1606 for (ch = 0; ch < 4; ch++) {
1607 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1608 s0_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1609 s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
1610 d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1611 tmp_reg[ch] = get_itemp(gen);
1612 }
1613 }
1614
1615 /* d = (s1 > s0) ? s0 : s1 */
1616 for (ch = 0; ch < 4; ch++) {
1617 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1618 spe_fcgt(gen->f, tmp_reg[ch], s1_reg[ch], s0_reg[ch]);
1619 }
1620 }
1621 for (ch = 0; ch < 4; ch++) {
1622 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1623 spe_selb(gen->f, d_reg[ch], s1_reg[ch], s0_reg[ch], tmp_reg[ch]);
1624 }
1625 }
1626
1627 for (ch = 0; ch < 4; ch++) {
1628 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1629 store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
1630 }
1631 }
1632
1633 free_itemps(gen);
1634 return true;
1635 }
1636
1637 static boolean
1638 emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst)
1639 {
1640 const int channel = 0;
1641 const int exec_reg = get_exec_mask_reg(gen);
1642
1643 spe_comment(gen->f, -4, "IF:");
1644
1645 /* update execution mask with the predicate register */
1646 int tmp_reg = get_itemp(gen);
1647 int s1_reg = get_src_reg(gen, channel, &inst->FullSrcRegisters[0]);
1648
1649 /* tmp = (s1_reg == 0) */
1650 spe_ceqi(gen->f, tmp_reg, s1_reg, 0);
1651 /* tmp = !tmp */
1652 spe_complement(gen->f, tmp_reg, tmp_reg);
1653 /* exec_mask = exec_mask & tmp */
1654 spe_and(gen->f, exec_reg, exec_reg, tmp_reg);
1655
1656 gen->if_nesting++;
1657
1658 free_itemps(gen);
1659
1660 return true;
1661 }
1662
1663
1664 static boolean
1665 emit_ELSE(struct codegen *gen, const struct tgsi_full_instruction *inst)
1666 {
1667 const int exec_reg = get_exec_mask_reg(gen);
1668
1669 spe_comment(gen->f, -4, "ELSE:");
1670
1671 /* exec_mask = !exec_mask */
1672 spe_complement(gen->f, exec_reg, exec_reg);
1673
1674 return true;
1675 }
1676
1677
1678 static boolean
1679 emit_ENDIF(struct codegen *gen, const struct tgsi_full_instruction *inst)
1680 {
1681 const int exec_reg = get_exec_mask_reg(gen);
1682
1683 spe_comment(gen->f, -4, "ENDIF:");
1684
1685 /* XXX todo: pop execution mask */
1686
1687 spe_load_int(gen->f, exec_reg, ~0x0);
1688
1689 gen->if_nesting--;
1690 return true;
1691 }
1692
1693
1694 static boolean
1695 emit_DDX_DDY(struct codegen *gen, const struct tgsi_full_instruction *inst,
1696 boolean ddx)
1697 {
1698 int ch;
1699
1700 spe_comment(gen->f, -4, ddx ? "DDX:" : "DDY:");
1701
1702 for (ch = 0; ch < 4; ch++) {
1703 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1704 int s_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1705 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1706
1707 int t1_reg = get_itemp(gen);
1708 int t2_reg = get_itemp(gen);
1709
1710 spe_splat_word(gen->f, t1_reg, s_reg, 0); /* upper-left pixel */
1711 if (ddx) {
1712 spe_splat_word(gen->f, t2_reg, s_reg, 1); /* upper-right pixel */
1713 }
1714 else {
1715 spe_splat_word(gen->f, t2_reg, s_reg, 2); /* lower-left pixel */
1716 }
1717 spe_fs(gen->f, d_reg, t2_reg, t1_reg);
1718
1719 free_itemps(gen);
1720 }
1721 }
1722
1723 return true;
1724 }
1725
1726
1727
1728
1729 /**
1730 * Emit END instruction.
1731 * We just return from the shader function at this point.
1732 *
1733 * Note that there may be more code after this that would be
1734 * called by TGSI_OPCODE_CALL.
1735 */
1736 static boolean
1737 emit_END(struct codegen *gen)
1738 {
1739 spe_comment(gen->f, -4, "END:");
1740 emit_epilogue(gen);
1741 return true;
1742 }
1743
1744
1745 /**
1746 * Emit code for the given instruction. Just a big switch stmt.
1747 */
1748 static boolean
1749 emit_instruction(struct codegen *gen,
1750 const struct tgsi_full_instruction *inst)
1751 {
1752 switch (inst->Instruction.Opcode) {
1753 case TGSI_OPCODE_MOV:
1754 case TGSI_OPCODE_SWZ:
1755 return emit_MOV(gen, inst);
1756 case TGSI_OPCODE_MUL:
1757 return emit_MUL(gen, inst);
1758 case TGSI_OPCODE_ADD:
1759 return emit_ADD(gen, inst);
1760 case TGSI_OPCODE_SUB:
1761 return emit_SUB(gen, inst);
1762 case TGSI_OPCODE_MAD:
1763 return emit_MAD(gen, inst);
1764 case TGSI_OPCODE_LERP:
1765 return emit_LERP(gen, inst);
1766 case TGSI_OPCODE_DP3:
1767 return emit_DP3(gen, inst);
1768 case TGSI_OPCODE_DP4:
1769 return emit_DP4(gen, inst);
1770 case TGSI_OPCODE_DPH:
1771 return emit_DPH(gen, inst);
1772 case TGSI_OPCODE_XPD:
1773 return emit_XPD(gen, inst);
1774 case TGSI_OPCODE_RCP:
1775 return emit_RCP(gen, inst);
1776 case TGSI_OPCODE_RSQ:
1777 return emit_RSQ(gen, inst);
1778 case TGSI_OPCODE_ABS:
1779 return emit_ABS(gen, inst);
1780 case TGSI_OPCODE_SGT:
1781 return emit_SGT(gen, inst);
1782 case TGSI_OPCODE_SLT:
1783 return emit_SLT(gen, inst);
1784 case TGSI_OPCODE_SGE:
1785 return emit_SGE(gen, inst);
1786 case TGSI_OPCODE_SLE:
1787 return emit_SLE(gen, inst);
1788 case TGSI_OPCODE_SEQ:
1789 return emit_SEQ(gen, inst);
1790 case TGSI_OPCODE_SNE:
1791 return emit_SNE(gen, inst);
1792 case TGSI_OPCODE_CMP:
1793 return emit_CMP(gen, inst);
1794 case TGSI_OPCODE_MAX:
1795 return emit_MAX(gen, inst);
1796 case TGSI_OPCODE_MIN:
1797 return emit_MIN(gen, inst);
1798 case TGSI_OPCODE_TRUNC:
1799 return emit_TRUNC(gen, inst);
1800 case TGSI_OPCODE_FLR:
1801 return emit_FLR(gen, inst);
1802 case TGSI_OPCODE_FRC:
1803 return emit_FRC(gen, inst);
1804 case TGSI_OPCODE_END:
1805 return emit_END(gen);
1806
1807 case TGSI_OPCODE_COS:
1808 return emit_function_call(gen, inst, "spu_cos", 1, TRUE);
1809 case TGSI_OPCODE_SIN:
1810 return emit_function_call(gen, inst, "spu_sin", 1, TRUE);
1811 case TGSI_OPCODE_POW:
1812 return emit_function_call(gen, inst, "spu_pow", 2, TRUE);
1813 case TGSI_OPCODE_EXPBASE2:
1814 return emit_function_call(gen, inst, "spu_exp2", 1, TRUE);
1815 case TGSI_OPCODE_LOGBASE2:
1816 return emit_function_call(gen, inst, "spu_log2", 1, TRUE);
1817 case TGSI_OPCODE_TEX:
1818 /* fall-through for now */
1819 case TGSI_OPCODE_TXD:
1820 /* fall-through for now */
1821 case TGSI_OPCODE_TXB:
1822 /* fall-through for now */
1823 case TGSI_OPCODE_TXL:
1824 /* fall-through for now */
1825 case TGSI_OPCODE_TXP:
1826 return emit_TEX(gen, inst);
1827 case TGSI_OPCODE_KIL:
1828 return emit_KIL(gen, inst);
1829
1830 case TGSI_OPCODE_IF:
1831 return emit_IF(gen, inst);
1832 case TGSI_OPCODE_ELSE:
1833 return emit_ELSE(gen, inst);
1834 case TGSI_OPCODE_ENDIF:
1835 return emit_ENDIF(gen, inst);
1836
1837 case TGSI_OPCODE_DDX:
1838 return emit_DDX_DDY(gen, inst, true);
1839 case TGSI_OPCODE_DDY:
1840 return emit_DDX_DDY(gen, inst, false);
1841
1842 /* XXX lots more cases to do... */
1843
1844 default:
1845 fprintf(stderr, "Cell: unimplemented TGSI instruction %d!\n",
1846 inst->Instruction.Opcode);
1847 return false;
1848 }
1849
1850 return true;
1851 }
1852
1853
1854
1855 /**
1856 * Emit code for a TGSI immediate value (vector of four floats).
1857 * This involves register allocation and initialization.
1858 * XXX the initialization should be done by a "prepare" stage, not
1859 * per quad execution!
1860 */
1861 static boolean
1862 emit_immediate(struct codegen *gen, const struct tgsi_full_immediate *immed)
1863 {
1864 int ch;
1865
1866 assert(gen->num_imm < MAX_TEMPS);
1867
1868 spe_comment(gen->f, -4, "IMMEDIATE:");
1869
1870 for (ch = 0; ch < 4; ch++) {
1871 float val = immed->u.ImmediateFloat32[ch].Float;
1872
1873 if (ch > 0 && val == immed->u.ImmediateFloat32[ch - 1].Float) {
1874 /* re-use previous register */
1875 gen->imm_regs[gen->num_imm][ch] = gen->imm_regs[gen->num_imm][ch - 1];
1876 }
1877 else {
1878 int reg = spe_allocate_available_register(gen->f);
1879
1880 if (reg < 0)
1881 return false;
1882
1883 /* update immediate map */
1884 gen->imm_regs[gen->num_imm][ch] = reg;
1885
1886 /* emit initializer instruction */
1887 spe_load_float(gen->f, reg, val);
1888 }
1889 }
1890
1891 gen->num_imm++;
1892
1893 return true;
1894 }
1895
1896
1897
1898 /**
1899 * Emit "code" for a TGSI declaration.
1900 * We only care about TGSI TEMPORARY register declarations at this time.
1901 * For each TGSI TEMPORARY we allocate four SPE registers.
1902 */
1903 static boolean
1904 emit_declaration(struct cell_context *cell,
1905 struct codegen *gen, const struct tgsi_full_declaration *decl)
1906 {
1907 int i, ch;
1908
1909 switch (decl->Declaration.File) {
1910 case TGSI_FILE_TEMPORARY:
1911 for (i = decl->DeclarationRange.First;
1912 i <= decl->DeclarationRange.Last;
1913 i++) {
1914 assert(i < MAX_TEMPS);
1915 for (ch = 0; ch < 4; ch++) {
1916 gen->temp_regs[i][ch] = spe_allocate_available_register(gen->f);
1917 if (gen->temp_regs[i][ch] < 0)
1918 return false; /* out of regs */
1919 }
1920
1921 /* XXX if we run out of SPE registers, we need to spill
1922 * to SPU memory. someday...
1923 */
1924
1925 {
1926 char buf[100];
1927 sprintf(buf, "TGSI temp[%d] maps to SPU regs [$%d $%d $%d $%d]", i,
1928 gen->temp_regs[i][0], gen->temp_regs[i][1],
1929 gen->temp_regs[i][2], gen->temp_regs[i][3]);
1930 spe_comment(gen->f, -4, buf);
1931 }
1932 }
1933 break;
1934 default:
1935 ; /* ignore */
1936 }
1937
1938 return true;
1939 }
1940
1941
1942
1943 /**
1944 * Translate TGSI shader code to SPE instructions. This is done when
1945 * the state tracker gives us a new shader (via pipe->create_fs_state()).
1946 *
1947 * \param cell the rendering context (in)
1948 * \param tokens the TGSI shader (in)
1949 * \param f the generated function (out)
1950 */
1951 boolean
1952 cell_gen_fragment_program(struct cell_context *cell,
1953 const struct tgsi_token *tokens,
1954 struct spe_function *f)
1955 {
1956 struct tgsi_parse_context parse;
1957 struct codegen gen;
1958
1959 memset(&gen, 0, sizeof(gen));
1960 gen.cell = cell;
1961 gen.f = f;
1962
1963 /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
1964 gen.inputs_reg = 3; /* pointer to inputs array */
1965 gen.outputs_reg = 4; /* pointer to outputs array */
1966 gen.constants_reg = 5; /* pointer to constants array */
1967
1968 spe_init_func(f, SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE);
1969 spe_allocate_register(f, gen.inputs_reg);
1970 spe_allocate_register(f, gen.outputs_reg);
1971 spe_allocate_register(f, gen.constants_reg);
1972
1973 if (cell->debug_flags & CELL_DEBUG_ASM) {
1974 spe_print_code(f, true);
1975 spe_indent(f, 8);
1976 printf("Begin %s\n", __FUNCTION__);
1977 tgsi_dump(tokens, 0);
1978 }
1979
1980 tgsi_parse_init(&parse, tokens);
1981
1982 emit_prologue(&gen);
1983
1984 while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) {
1985 tgsi_parse_token(&parse);
1986
1987 switch (parse.FullToken.Token.Type) {
1988 case TGSI_TOKEN_TYPE_IMMEDIATE:
1989 if (!emit_immediate(&gen, &parse.FullToken.FullImmediate))
1990 gen.error = true;
1991 break;
1992
1993 case TGSI_TOKEN_TYPE_DECLARATION:
1994 if (!emit_declaration(cell, &gen, &parse.FullToken.FullDeclaration))
1995 gen.error = true;
1996 break;
1997
1998 case TGSI_TOKEN_TYPE_INSTRUCTION:
1999 if (!emit_instruction(&gen, &parse.FullToken.FullInstruction))
2000 gen.error = true;
2001 break;
2002
2003 default:
2004 assert(0);
2005 }
2006 }
2007
2008 if (gen.error) {
2009 /* terminate the SPE code */
2010 return emit_END(&gen);
2011 }
2012
2013 if (cell->debug_flags & CELL_DEBUG_ASM) {
2014 printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst);
2015 printf("End %s\n", __FUNCTION__);
2016 }
2017
2018 tgsi_parse_free( &parse );
2019
2020 return !gen.error;
2021 }