Merge commit 'origin/master' into gallium-0.2
[mesa.git] / src / gallium / drivers / cell / ppu / cell_gen_fp.c
1 /**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29
30 /**
31 * Generate SPU fragment program/shader code.
32 *
33 * Note that we generate SOA-style code here. So each TGSI instruction
34 * operates on four pixels (and is translated into four SPU instructions,
35 * generally speaking).
36 *
37 * \author Brian Paul
38 */
39
40 #include <math.h>
41 #include "pipe/p_defines.h"
42 #include "pipe/p_state.h"
43 #include "pipe/p_shader_tokens.h"
44 #include "tgsi/tgsi_parse.h"
45 #include "tgsi/tgsi_util.h"
46 #include "tgsi/tgsi_exec.h"
47 #include "tgsi/tgsi_dump.h"
48 #include "rtasm/rtasm_ppc_spe.h"
49 #include "util/u_memory.h"
50 #include "cell_context.h"
51 #include "cell_gen_fp.h"
52
53
54 #define MAX_TEMPS 16
55 #define MAX_IMMED 8
56
57 #define CHAN_X 0
58 #define CHAN_Y 1
59 #define CHAN_Z 2
60 #define CHAN_W 3
61
62 /**
63 * Context needed during code generation.
64 */
65 struct codegen
66 {
67 struct cell_context *cell;
68 int inputs_reg; /**< 1st function parameter */
69 int outputs_reg; /**< 2nd function parameter */
70 int constants_reg; /**< 3rd function parameter */
71 int temp_regs[MAX_TEMPS][4]; /**< maps TGSI temps to SPE registers */
72 int imm_regs[MAX_IMMED][4]; /**< maps TGSI immediates to SPE registers */
73
74 int num_imm; /**< number of immediates */
75
76 int one_reg; /**< register containing {1.0, 1.0, 1.0, 1.0} */
77
78 /** Per-instruction temps / intermediate temps */
79 int num_itemps;
80 int itemps[12];
81
82 /** Current IF/ELSE/ENDIF nesting level */
83 int if_nesting;
84 /** Index of execution mask register */
85 int exec_mask_reg;
86
87 /** KIL mask: indicates which fragments have been killed */
88 int kill_mask_reg;
89
90 int frame_size; /**< Stack frame size, in words */
91
92 struct spe_function *f;
93 boolean error;
94 };
95
96
97 /**
98 * Allocate an intermediate temporary register.
99 */
100 static int
101 get_itemp(struct codegen *gen)
102 {
103 int t = spe_allocate_available_register(gen->f);
104 assert(gen->num_itemps < Elements(gen->itemps));
105 gen->itemps[gen->num_itemps++] = t;
106 return t;
107 }
108
109 /**
110 * Free all intermediate temporary registers. To be called after each
111 * instruction has been emitted.
112 */
113 static void
114 free_itemps(struct codegen *gen)
115 {
116 int i;
117 for (i = 0; i < gen->num_itemps; i++) {
118 spe_release_register(gen->f, gen->itemps[i]);
119 }
120 gen->num_itemps = 0;
121 }
122
123
124 /**
125 * Return index of an SPE register containing {1.0, 1.0, 1.0, 1.0}.
126 * The register is allocated and initialized upon the first call.
127 */
128 static int
129 get_const_one_reg(struct codegen *gen)
130 {
131 if (gen->one_reg <= 0) {
132 gen->one_reg = spe_allocate_available_register(gen->f);
133
134 spe_indent(gen->f, 4);
135 spe_comment(gen->f, -4, "INIT CONSTANT 1.0:");
136
137 /* one = {1.0, 1.0, 1.0, 1.0} */
138 spe_load_float(gen->f, gen->one_reg, 1.0f);
139
140 spe_indent(gen->f, -4);
141 }
142
143 return gen->one_reg;
144 }
145
146
147 /**
148 * Return index of the pixel execution mask.
149 * The register is allocated an initialized upon the first call.
150 *
151 * The pixel execution mask controls which pixels in a quad are
152 * modified, according to surrounding conditionals, loops, etc.
153 */
154 static int
155 get_exec_mask_reg(struct codegen *gen)
156 {
157 if (gen->exec_mask_reg <= 0) {
158 gen->exec_mask_reg = spe_allocate_available_register(gen->f);
159
160 spe_indent(gen->f, 4);
161 spe_comment(gen->f, -4, "INIT EXEC MASK = ~0:");
162
163 /* exec_mask = {~0, ~0, ~0, ~0} */
164 spe_load_int(gen->f, gen->exec_mask_reg, ~0);
165
166 spe_indent(gen->f, -4);
167 }
168
169 return gen->exec_mask_reg;
170 }
171
172
173 static boolean
174 is_register_src(struct codegen *gen, int channel,
175 const struct tgsi_full_src_register *src)
176 {
177 int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel);
178 int sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel);
179
180 if (swizzle > TGSI_SWIZZLE_W || sign_op != TGSI_UTIL_SIGN_KEEP) {
181 return FALSE;
182 }
183 if (src->SrcRegister.File == TGSI_FILE_TEMPORARY ||
184 src->SrcRegister.File == TGSI_FILE_IMMEDIATE) {
185 return TRUE;
186 }
187 return FALSE;
188 }
189
190
191 static boolean
192 is_memory_dst(struct codegen *gen, int channel,
193 const struct tgsi_full_dst_register *dst)
194 {
195 if (dst->DstRegister.File == TGSI_FILE_OUTPUT) {
196 return TRUE;
197 }
198 else {
199 return FALSE;
200 }
201 }
202
203
204 /**
205 * Return the index of the SPU temporary containing the named TGSI
206 * source register. If the TGSI register is a TGSI_FILE_TEMPORARY we
207 * just return the corresponding SPE register. If the TGIS register
208 * is TGSI_FILE_INPUT/CONSTANT/IMMEDIATE we allocate a new SPE register
209 * and emit an SPE load instruction.
210 */
211 static int
212 get_src_reg(struct codegen *gen,
213 int channel,
214 const struct tgsi_full_src_register *src)
215 {
216 int reg = -1;
217 int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel);
218 boolean reg_is_itemp = FALSE;
219 uint sign_op;
220
221 assert(swizzle >= TGSI_SWIZZLE_X);
222 assert(swizzle <= TGSI_EXTSWIZZLE_ONE);
223
224 if (swizzle == TGSI_EXTSWIZZLE_ONE) {
225 /* Load const one float and early out */
226 reg = get_const_one_reg(gen);
227 }
228 else if (swizzle == TGSI_EXTSWIZZLE_ZERO) {
229 /* Load const zero float and early out */
230 reg = get_itemp(gen);
231 spe_xor(gen->f, reg, reg, reg);
232 }
233 else {
234 assert(swizzle < 4);
235
236 switch (src->SrcRegister.File) {
237 case TGSI_FILE_TEMPORARY:
238 reg = gen->temp_regs[src->SrcRegister.Index][swizzle];
239 break;
240 case TGSI_FILE_INPUT:
241 {
242 /* offset is measured in quadwords, not bytes */
243 int offset = src->SrcRegister.Index * 4 + swizzle;
244 reg = get_itemp(gen);
245 reg_is_itemp = TRUE;
246 /* Load: reg = memory[(machine_reg) + offset] */
247 spe_lqd(gen->f, reg, gen->inputs_reg, offset * 16);
248 }
249 break;
250 case TGSI_FILE_IMMEDIATE:
251 reg = gen->imm_regs[src->SrcRegister.Index][swizzle];
252 break;
253 case TGSI_FILE_CONSTANT:
254 {
255 /* offset is measured in quadwords, not bytes */
256 int offset = src->SrcRegister.Index * 4 + swizzle;
257 reg = get_itemp(gen);
258 reg_is_itemp = TRUE;
259 /* Load: reg = memory[(machine_reg) + offset] */
260 spe_lqd(gen->f, reg, gen->constants_reg, offset * 16);
261 }
262 break;
263 default:
264 assert(0);
265 }
266 }
267
268 /*
269 * Handle absolute value, negate or set-negative of src register.
270 */
271 sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel);
272 if (sign_op != TGSI_UTIL_SIGN_KEEP) {
273 /*
274 * All sign ops are done by manipulating bit 31, the IEEE float sign bit.
275 */
276 const int bit31mask_reg = get_itemp(gen);
277 int result_reg;
278
279 if (reg_is_itemp) {
280 /* re-use 'reg' for the result */
281 result_reg = reg;
282 }
283 else {
284 /* alloc a new reg for the result */
285 result_reg = get_itemp(gen);
286 }
287
288 /* mask with bit 31 set, the rest cleared */
289 spe_load_uint(gen->f, bit31mask_reg, (1 << 31));
290
291 if (sign_op == TGSI_UTIL_SIGN_CLEAR) {
292 spe_andc(gen->f, result_reg, reg, bit31mask_reg);
293 }
294 else if (sign_op == TGSI_UTIL_SIGN_SET) {
295 spe_and(gen->f, result_reg, reg, bit31mask_reg);
296 }
297 else {
298 assert(sign_op == TGSI_UTIL_SIGN_TOGGLE);
299 spe_xor(gen->f, result_reg, reg, bit31mask_reg);
300 }
301
302 reg = result_reg;
303 }
304
305 return reg;
306 }
307
308
309 /**
310 * Return the index of an SPE register to use for the given TGSI register.
311 * If the TGSI register is TGSI_FILE_TEMPORARAY, the index of the
312 * corresponding SPE register is returned. If the TGSI register is
313 * TGSI_FILE_OUTPUT we allocate an intermediate temporary register.
314 * See store_dest_reg() below...
315 */
316 static int
317 get_dst_reg(struct codegen *gen,
318 int channel,
319 const struct tgsi_full_dst_register *dest)
320 {
321 int reg = -1;
322
323 switch (dest->DstRegister.File) {
324 case TGSI_FILE_TEMPORARY:
325 if (gen->if_nesting > 0)
326 reg = get_itemp(gen);
327 else
328 reg = gen->temp_regs[dest->DstRegister.Index][channel];
329 break;
330 case TGSI_FILE_OUTPUT:
331 reg = get_itemp(gen);
332 break;
333 default:
334 assert(0);
335 }
336
337 return reg;
338 }
339
340
341 /**
342 * When a TGSI instruction is writing to an output register, this
343 * function emits the SPE store instruction to store the value_reg.
344 * \param value_reg the SPE register containing the value to store.
345 * This would have been returned by get_dst_reg().
346 */
347 static void
348 store_dest_reg(struct codegen *gen,
349 int value_reg, int channel,
350 const struct tgsi_full_dst_register *dest)
351 {
352 /*
353 * XXX need to implement dst reg clamping/saturation
354 */
355 #if 0
356 switch (inst->Instruction.Saturate) {
357 case TGSI_SAT_NONE:
358 break;
359 case TGSI_SAT_ZERO_ONE:
360 break;
361 case TGSI_SAT_MINUS_PLUS_ONE:
362 break;
363 default:
364 assert( 0 );
365 }
366 #endif
367
368 switch (dest->DstRegister.File) {
369 case TGSI_FILE_TEMPORARY:
370 if (gen->if_nesting > 0) {
371 int d_reg = gen->temp_regs[dest->DstRegister.Index][channel];
372 int exec_reg = get_exec_mask_reg(gen);
373 /* Mix d with new value according to exec mask:
374 * d[i] = mask_reg[i] ? value_reg : d_reg
375 */
376 spe_selb(gen->f, d_reg, d_reg, value_reg, exec_reg);
377 }
378 else {
379 /* we're not inside a condition or loop: do nothing special */
380
381 }
382 break;
383 case TGSI_FILE_OUTPUT:
384 {
385 /* offset is measured in quadwords, not bytes */
386 int offset = dest->DstRegister.Index * 4 + channel;
387 if (gen->if_nesting > 0) {
388 int exec_reg = get_exec_mask_reg(gen);
389 int curval_reg = get_itemp(gen);
390 /* First read the current value from memory:
391 * Load: curval = memory[(machine_reg) + offset]
392 */
393 spe_lqd(gen->f, curval_reg, gen->outputs_reg, offset * 16);
394 /* Mix curval with newvalue according to exec mask:
395 * d[i] = mask_reg[i] ? value_reg : d_reg
396 */
397 spe_selb(gen->f, curval_reg, curval_reg, value_reg, exec_reg);
398 /* Store: memory[(machine_reg) + offset] = curval */
399 spe_stqd(gen->f, curval_reg, gen->outputs_reg, offset * 16);
400 }
401 else {
402 /* Store: memory[(machine_reg) + offset] = reg */
403 spe_stqd(gen->f, value_reg, gen->outputs_reg, offset * 16);
404 }
405 }
406 break;
407 default:
408 assert(0);
409 }
410 }
411
412
413
414 static void
415 emit_prologue(struct codegen *gen)
416 {
417 gen->frame_size = 1024; /* XXX temporary, should be dynamic */
418
419 spe_comment(gen->f, -4, "Function prologue:");
420
421 /* save $lr on stack # stqd $lr,16($sp) */
422 spe_stqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16);
423
424 if (gen->frame_size >= 512) {
425 /* offset is too large for ai instruction */
426 int offset_reg = spe_allocate_available_register(gen->f);
427 int sp_reg = spe_allocate_available_register(gen->f);
428 /* offset = -framesize */
429 spe_load_int(gen->f, offset_reg, -gen->frame_size);
430 /* sp = $sp */
431 spe_move(gen->f, sp_reg, SPE_REG_SP);
432 /* $sp = $sp + offset_reg */
433 spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg);
434 /* save $sp in stack frame */
435 spe_stqd(gen->f, sp_reg, SPE_REG_SP, 0);
436 /* clean up */
437 spe_release_register(gen->f, offset_reg);
438 spe_release_register(gen->f, sp_reg);
439 }
440 else {
441 /* save stack pointer # stqd $sp,-frameSize($sp) */
442 spe_stqd(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size);
443
444 /* adjust stack pointer # ai $sp,$sp,-frameSize */
445 spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size);
446 }
447 }
448
449
450 static void
451 emit_epilogue(struct codegen *gen)
452 {
453 const int return_reg = 3;
454
455 spe_comment(gen->f, -4, "Function epilogue:");
456
457 spe_comment(gen->f, 0, "return the killed mask");
458 if (gen->kill_mask_reg > 0) {
459 /* shader called KIL, return the "alive" mask */
460 spe_move(gen->f, return_reg, gen->kill_mask_reg);
461 }
462 else {
463 /* return {0,0,0,0} */
464 spe_load_uint(gen->f, return_reg, 0);
465 }
466
467 spe_comment(gen->f, 0, "restore stack and return");
468 if (gen->frame_size >= 512) {
469 /* offset is too large for ai instruction */
470 int offset_reg = spe_allocate_available_register(gen->f);
471 /* offset = framesize */
472 spe_load_int(gen->f, offset_reg, gen->frame_size);
473 /* $sp = $sp + offset */
474 spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg);
475 /* clean up */
476 spe_release_register(gen->f, offset_reg);
477 }
478 else {
479 /* restore stack pointer # ai $sp,$sp,frameSize */
480 spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, gen->frame_size);
481 }
482
483 /* restore $lr # lqd $lr,16($sp) */
484 spe_lqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16);
485
486 /* return from function call */
487 spe_bi(gen->f, SPE_REG_RA, 0, 0);
488 }
489
490
491 static boolean
492 emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst)
493 {
494 int ch, src_reg[4], dst_reg[4];
495
496 spe_comment(gen->f, -4, "MOV:");
497 for (ch = 0; ch < 4; ch++) {
498 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
499 src_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
500 dst_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
501 }
502 }
503
504 for (ch = 0; ch < 4; ch++) {
505 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
506 if (is_register_src(gen, ch, &inst->FullSrcRegisters[0]) &&
507 is_memory_dst(gen, ch, &inst->FullDstRegisters[0])) {
508 /* special-case: register to memory store */
509 store_dest_reg(gen, src_reg[ch], ch, &inst->FullDstRegisters[0]);
510 }
511 else {
512 spe_move(gen->f, dst_reg[ch], src_reg[ch]);
513 store_dest_reg(gen, dst_reg[ch], ch, &inst->FullDstRegisters[0]);
514 }
515 free_itemps(gen);
516 }
517 }
518 return true;
519 }
520
521 /**
522 * Emit addition instructions. Recall that a single TGSI_OPCODE_ADD
523 * becomes (up to) four SPU "fa" instructions because we're doing SOA
524 * processing.
525 */
526 static boolean
527 emit_ADD(struct codegen *gen, const struct tgsi_full_instruction *inst)
528 {
529 int ch, s1_reg[4], s2_reg[4], d_reg[4];
530
531 spe_comment(gen->f, -4, "ADD:");
532 /* Loop over Red/Green/Blue/Alpha channels, fetch src operands */
533 for (ch = 0; ch < 4; ch++) {
534 /* If the dest R, G, B or A writemask is enabled... */
535 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
536 s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
537 s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
538 d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
539 }
540 }
541 /* Loop over Red/Green/Blue/Alpha channels, do the add, store results */
542 for (ch = 0; ch < 4; ch++) {
543 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
544 /* Emit actual SPE instruction: d = s1 + s2 */
545 spe_fa(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
546 /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
547 store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
548 /* Free any intermediate temps we allocated */
549 free_itemps(gen);
550 }
551 }
552 return true;
553 }
554
555 /**
556 * Emit subtract. See emit_ADD for comments.
557 */
558 static boolean
559 emit_SUB(struct codegen *gen, const struct tgsi_full_instruction *inst)
560 {
561 int ch, s1_reg[4], s2_reg[4], d_reg[4];
562 spe_comment(gen->f, -4, "SUB:");
563 for (ch = 0; ch < 4; ch++) {
564 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
565 s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
566 s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
567 d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
568 }
569 }
570 for (ch = 0; ch < 4; ch++) {
571 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
572 /* d = s1 - s2 */
573 spe_fs(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
574 store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
575 free_itemps(gen);
576 }
577 }
578 return true;
579 }
580
581 /**
582 * Emit multiply add. See emit_ADD for comments.
583 */
584 static boolean
585 emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst)
586 {
587 int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4];
588 spe_comment(gen->f, -4, "MAD:");
589 for (ch = 0; ch < 4; ch++) {
590 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
591 s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
592 s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
593 s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
594 d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
595 }
596 }
597 for (ch = 0; ch < 4; ch++) {
598 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
599 /* d = s1 * s2 + s3 */
600 spe_fma(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch], s3_reg[ch]);
601 store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
602 free_itemps(gen);
603 }
604 }
605 return true;
606 }
607
608
609 /**
610 * Emit linear interpolate. See emit_ADD for comments.
611 */
612 static boolean
613 emit_LERP(struct codegen *gen, const struct tgsi_full_instruction *inst)
614 {
615 int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4], tmp_reg[4];
616 spe_comment(gen->f, -4, "LERP:");
617 /* setup/get src/dst/temp regs */
618 for (ch = 0; ch < 4; ch++) {
619 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
620 s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
621 s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
622 s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
623 d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
624 tmp_reg[ch] = get_itemp(gen);
625 }
626 }
627
628 /* d = s3 + s1(s2 - s3) */
629 /* do all subtracts, then all fma, then all stores to better pipeline */
630 for (ch = 0; ch < 4; ch++) {
631 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
632 spe_fs(gen->f, tmp_reg[ch], s2_reg[ch], s3_reg[ch]);
633 }
634 }
635 for (ch = 0; ch < 4; ch++) {
636 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
637 spe_fma(gen->f, d_reg[ch], tmp_reg[ch], s1_reg[ch], s3_reg[ch]);
638 }
639 }
640 for (ch = 0; ch < 4; ch++) {
641 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
642 store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
643 }
644 }
645 free_itemps(gen);
646 return true;
647 }
648
649 /**
650 * Emit multiply. See emit_ADD for comments.
651 */
652 static boolean
653 emit_MUL(struct codegen *gen, const struct tgsi_full_instruction *inst)
654 {
655 int ch, s1_reg[4], s2_reg[4], d_reg[4];
656 spe_comment(gen->f, -4, "MUL:");
657 for (ch = 0; ch < 4; ch++) {
658 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
659 s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
660 s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
661 d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
662 }
663 }
664 for (ch = 0; ch < 4; ch++) {
665 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
666 /* d = s1 * s2 */
667 spe_fm(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
668 store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
669 free_itemps(gen);
670 }
671 }
672 return true;
673 }
674
675 /**
676 * Emit reciprocal. See emit_ADD for comments.
677 */
678 static boolean
679 emit_RCP(struct codegen *gen, const struct tgsi_full_instruction *inst)
680 {
681 int ch;
682 spe_comment(gen->f, -4, "RCP:");
683 for (ch = 0; ch < 4; ch++) {
684 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
685 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
686 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
687 /* d = 1/s1 */
688 spe_frest(gen->f, d_reg, s1_reg);
689 spe_fi(gen->f, d_reg, s1_reg, d_reg);
690 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
691 free_itemps(gen);
692 }
693 }
694 return true;
695 }
696
697 /**
698 * Emit reciprocal sqrt. See emit_ADD for comments.
699 */
700 static boolean
701 emit_RSQ(struct codegen *gen, const struct tgsi_full_instruction *inst)
702 {
703 int ch;
704 spe_comment(gen->f, -4, "RSQ:");
705 for (ch = 0; ch < 4; ch++) {
706 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
707 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
708 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
709 /* d = 1/s1 */
710 spe_frsqest(gen->f, d_reg, s1_reg);
711 spe_fi(gen->f, d_reg, s1_reg, d_reg);
712 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
713 free_itemps(gen);
714 }
715 }
716 return true;
717 }
718
719 /**
720 * Emit absolute value. See emit_ADD for comments.
721 */
722 static boolean
723 emit_ABS(struct codegen *gen, const struct tgsi_full_instruction *inst)
724 {
725 int ch;
726 spe_comment(gen->f, -4, "ABS:");
727 for (ch = 0; ch < 4; ch++) {
728 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
729 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
730 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
731 const int bit31mask_reg = get_itemp(gen);
732
733 /* mask with bit 31 set, the rest cleared */
734 spe_load_uint(gen->f, bit31mask_reg, (1 << 31));
735
736 /* d = sign bit cleared in s1 */
737 spe_andc(gen->f, d_reg, s1_reg, bit31mask_reg);
738
739 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
740 free_itemps(gen);
741 }
742 }
743 return true;
744 }
745
746 /**
747 * Emit 3 component dot product. See emit_ADD for comments.
748 */
749 static boolean
750 emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst)
751 {
752 int ch;
753 int s1x_reg, s1y_reg, s1z_reg;
754 int s2x_reg, s2y_reg, s2z_reg;
755 int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen);
756
757 spe_comment(gen->f, -4, "DP3:");
758
759 s1x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
760 s2x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
761 s1y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
762 s2y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
763 s1z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
764 s2z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
765
766 /* t0 = x0 * x1 */
767 spe_fm(gen->f, t0_reg, s1x_reg, s2x_reg);
768
769 /* t1 = y0 * y1 */
770 spe_fm(gen->f, t1_reg, s1y_reg, s2y_reg);
771
772 /* t0 = z0 * z1 + t0 */
773 spe_fma(gen->f, t0_reg, s1z_reg, s2z_reg, t0_reg);
774
775 /* t0 = t0 + t1 */
776 spe_fa(gen->f, t0_reg, t0_reg, t1_reg);
777
778 for (ch = 0; ch < 4; ch++) {
779 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
780 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
781 spe_move(gen->f, d_reg, t0_reg);
782 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
783 }
784 }
785
786 free_itemps(gen);
787 return true;
788 }
789
790 /**
791 * Emit 4 component dot product. See emit_ADD for comments.
792 */
793 static boolean
794 emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst)
795 {
796 int ch;
797 int s0x_reg, s0y_reg, s0z_reg, s0w_reg;
798 int s1x_reg, s1y_reg, s1z_reg, s1w_reg;
799 int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen);
800
801 spe_comment(gen->f, -4, "DP4:");
802
803 s0x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
804 s1x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
805 s0y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
806 s1y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
807 s0z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
808 s1z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
809 s0w_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[0]);
810 s1w_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]);
811
812 /* t0 = x0 * x1 */
813 spe_fm(gen->f, t0_reg, s0x_reg, s1x_reg);
814
815 /* t1 = y0 * y1 */
816 spe_fm(gen->f, t1_reg, s0y_reg, s1y_reg);
817
818 /* t0 = z0 * z1 + t0 */
819 spe_fma(gen->f, t0_reg, s0z_reg, s1z_reg, t0_reg);
820
821 /* t1 = w0 * w1 + t1 */
822 spe_fma(gen->f, t1_reg, s0w_reg, s1w_reg, t1_reg);
823
824 /* t0 = t0 + t1 */
825 spe_fa(gen->f, t0_reg, t0_reg, t1_reg);
826
827 for (ch = 0; ch < 4; ch++) {
828 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
829 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
830 spe_move(gen->f, d_reg, t0_reg);
831 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
832 }
833 }
834
835 free_itemps(gen);
836 return true;
837 }
838
839 /**
840 * Emit homogeneous dot product. See emit_ADD for comments.
841 */
842 static boolean
843 emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst)
844 {
845 /* XXX rewrite this function to look more like DP3/DP4 */
846 int ch;
847 spe_comment(gen->f, -4, "DPH:");
848
849 int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
850 int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
851 int tmp_reg = get_itemp(gen);
852
853 /* t = x0 * x1 */
854 spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
855
856 s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
857 s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
858 /* t = y0 * y1 + t */
859 spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
860
861 s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
862 s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
863 /* t = z0 * z1 + t */
864 spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
865
866 s2_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]);
867 /* t = w1 + t */
868 spe_fa(gen->f, tmp_reg, s2_reg, tmp_reg);
869
870 for (ch = 0; ch < 4; ch++) {
871 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
872 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
873 spe_move(gen->f, d_reg, tmp_reg);
874 store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]);
875 }
876 }
877
878 free_itemps(gen);
879 return true;
880 }
881
882 /**
883 * Emit 3-component vector normalize.
884 */
885 static boolean
886 emit_NRM3(struct codegen *gen, const struct tgsi_full_instruction *inst)
887 {
888 int ch;
889 int src_reg[3];
890 int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen);
891
892 spe_comment(gen->f, -4, "NRM3:");
893
894 src_reg[0] = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
895 src_reg[1] = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
896 src_reg[2] = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
897
898 /* t0 = x * x */
899 spe_fm(gen->f, t0_reg, src_reg[0], src_reg[0]);
900
901 /* t1 = y * y */
902 spe_fm(gen->f, t1_reg, src_reg[1], src_reg[1]);
903
904 /* t0 = z * z + t0 */
905 spe_fma(gen->f, t0_reg, src_reg[2], src_reg[2], t0_reg);
906
907 /* t0 = t0 + t1 */
908 spe_fa(gen->f, t0_reg, t0_reg, t1_reg);
909
910 /* t1 = 1.0 / sqrt(t0) */
911 spe_frsqest(gen->f, t1_reg, t0_reg);
912 spe_fi(gen->f, t1_reg, t0_reg, t1_reg);
913
914 for (ch = 0; ch < 3; ch++) { /* NOTE: omit W channel */
915 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
916 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
917 /* dst = src[ch] * t1 */
918 spe_fm(gen->f, d_reg, src_reg[ch], t1_reg);
919 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
920 }
921 }
922
923 free_itemps(gen);
924 return true;
925 }
926
927
928 /**
929 * Emit cross product. See emit_ADD for comments.
930 */
931 static boolean
932 emit_XPD(struct codegen *gen, const struct tgsi_full_instruction *inst)
933 {
934 spe_comment(gen->f, -4, "XPD:");
935
936 int s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
937 int s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
938 int tmp_reg = get_itemp(gen);
939
940 /* t = z0 * y1 */
941 spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
942
943 s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
944 s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
945 /* t = y0 * z1 - t */
946 spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
947
948 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_X)) {
949 store_dest_reg(gen, tmp_reg, CHAN_X, &inst->FullDstRegisters[0]);
950 }
951
952 s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
953 s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
954 /* t = x0 * z1 */
955 spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
956
957 s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
958 s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
959 /* t = z0 * x1 - t */
960 spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
961
962 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Y)) {
963 store_dest_reg(gen, tmp_reg, CHAN_Y, &inst->FullDstRegisters[0]);
964 }
965
966 s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
967 s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
968 /* t = y0 * x1 */
969 spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
970
971 s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
972 s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
973 /* t = x0 * y1 - t */
974 spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
975
976 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Z)) {
977 store_dest_reg(gen, tmp_reg, CHAN_Z, &inst->FullDstRegisters[0]);
978 }
979
980 free_itemps(gen);
981 return true;
982 }
983
984 /**
985 * Emit set-if-greater-than.
986 * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as
987 * the result but OpenGL/TGSI needs 0.0 and 1.0 results.
988 * We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND.
989 */
990 static boolean
991 emit_SGT(struct codegen *gen, const struct tgsi_full_instruction *inst)
992 {
993 int ch;
994
995 spe_comment(gen->f, -4, "SGT:");
996
997 for (ch = 0; ch < 4; ch++) {
998 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
999 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1000 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
1001 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1002
1003 /* d = (s1 > s2) */
1004 spe_fcgt(gen->f, d_reg, s1_reg, s2_reg);
1005
1006 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
1007 /* d = d & one_reg */
1008 spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
1009
1010 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1011 free_itemps(gen);
1012 }
1013 }
1014
1015 return true;
1016 }
1017
1018 /**
1019 * Emit set-if_less-then. See emit_SGT for comments.
1020 */
1021 static boolean
1022 emit_SLT(struct codegen *gen, const struct tgsi_full_instruction *inst)
1023 {
1024 int ch;
1025
1026 spe_comment(gen->f, -4, "SLT:");
1027
1028 for (ch = 0; ch < 4; ch++) {
1029 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1030 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1031 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
1032 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1033
1034 /* d = (s1 < s2) */
1035 spe_fcgt(gen->f, d_reg, s2_reg, s1_reg);
1036
1037 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
1038 /* d = d & one_reg */
1039 spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
1040
1041 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1042 free_itemps(gen);
1043 }
1044 }
1045
1046 return true;
1047 }
1048
1049 /**
1050 * Emit set-if_greater-then-or-equal. See emit_SGT for comments.
1051 */
1052 static boolean
1053 emit_SGE(struct codegen *gen, const struct tgsi_full_instruction *inst)
1054 {
1055 int ch;
1056
1057 spe_comment(gen->f, -4, "SGE:");
1058
1059 for (ch = 0; ch < 4; ch++) {
1060 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1061 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1062 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
1063 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1064
1065 /* d = (s1 >= s2) */
1066 spe_fcgt(gen->f, d_reg, s2_reg, s1_reg);
1067
1068 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
1069 /* d = ~d & one_reg */
1070 spe_andc(gen->f, d_reg, get_const_one_reg(gen), d_reg);
1071
1072 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1073 free_itemps(gen);
1074 }
1075 }
1076
1077 return true;
1078 }
1079
1080 /**
1081 * Emit set-if_less-then-or-equal. See emit_SGT for comments.
1082 */
1083 static boolean
1084 emit_SLE(struct codegen *gen, const struct tgsi_full_instruction *inst)
1085 {
1086 int ch;
1087
1088 spe_comment(gen->f, -4, "SLE:");
1089
1090 for (ch = 0; ch < 4; ch++) {
1091 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1092 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1093 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
1094 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1095
1096 /* d = (s1 <= s2) */
1097 spe_fcgt(gen->f, d_reg, s1_reg, s2_reg);
1098
1099 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
1100 /* d = ~d & one_reg */
1101 spe_andc(gen->f, d_reg, get_const_one_reg(gen), d_reg);
1102
1103 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1104 free_itemps(gen);
1105 }
1106 }
1107
1108 return true;
1109 }
1110
1111 /**
1112 * Emit set-if_equal. See emit_SGT for comments.
1113 */
1114 static boolean
1115 emit_SEQ(struct codegen *gen, const struct tgsi_full_instruction *inst)
1116 {
1117 int ch;
1118
1119 spe_comment(gen->f, -4, "SEQ:");
1120
1121 for (ch = 0; ch < 4; ch++) {
1122 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1123 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1124 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
1125 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1126
1127 /* d = (s1 == s2) */
1128 spe_fceq(gen->f, d_reg, s1_reg, s2_reg);
1129
1130 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
1131 /* d = d & one_reg */
1132 spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
1133
1134 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1135 free_itemps(gen);
1136 }
1137 }
1138
1139 return true;
1140 }
1141
1142 /**
1143 * Emit set-if_not_equal. See emit_SGT for comments.
1144 */
1145 static boolean
1146 emit_SNE(struct codegen *gen, const struct tgsi_full_instruction *inst)
1147 {
1148 int ch;
1149
1150 spe_comment(gen->f, -4, "SNE:");
1151
1152 for (ch = 0; ch < 4; ch++) {
1153 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1154 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1155 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
1156 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1157
1158 /* d = (s1 != s2) */
1159 spe_fceq(gen->f, d_reg, s1_reg, s2_reg);
1160 spe_nor(gen->f, d_reg, d_reg, d_reg);
1161
1162 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
1163 /* d = d & one_reg */
1164 spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
1165
1166 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1167 free_itemps(gen);
1168 }
1169 }
1170
1171 return true;
1172 }
1173
1174 /**
1175 * Emit compare. See emit_SGT for comments.
1176 */
1177 static boolean
1178 emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst)
1179 {
1180 int ch;
1181
1182 spe_comment(gen->f, -4, "CMP:");
1183
1184 for (ch = 0; ch < 4; ch++) {
1185 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1186 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1187 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
1188 int s3_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
1189 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1190 int zero_reg = get_itemp(gen);
1191
1192 spe_xor(gen->f, zero_reg, zero_reg, zero_reg);
1193
1194 /* d = (s1 < 0) ? s2 : s3 */
1195 spe_fcgt(gen->f, d_reg, zero_reg, s1_reg);
1196 spe_selb(gen->f, d_reg, s3_reg, s2_reg, d_reg);
1197
1198 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1199 free_itemps(gen);
1200 }
1201 }
1202
1203 return true;
1204 }
1205
1206 /**
1207 * Emit trunc.
1208 * Convert float to signed int
1209 * Convert signed int to float
1210 */
1211 static boolean
1212 emit_TRUNC(struct codegen *gen, const struct tgsi_full_instruction *inst)
1213 {
1214 int ch;
1215
1216 spe_comment(gen->f, -4, "TRUNC:");
1217
1218 for (ch = 0; ch < 4; ch++) {
1219 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1220 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1221 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1222
1223 /* Convert float to int */
1224 spe_cflts(gen->f, d_reg, s1_reg, 0);
1225
1226 /* Convert int to float */
1227 spe_csflt(gen->f, d_reg, d_reg, 0);
1228
1229 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1230 free_itemps(gen);
1231 }
1232 }
1233
1234 return true;
1235 }
1236
1237 /**
1238 * Emit floor.
1239 * If negative int subtract one
1240 * Convert float to signed int
1241 * Convert signed int to float
1242 */
1243 static boolean
1244 emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst)
1245 {
1246 int ch;
1247
1248 spe_comment(gen->f, -4, "FLR:");
1249
1250 int zero_reg = get_itemp(gen);
1251 spe_xor(gen->f, zero_reg, zero_reg, zero_reg);
1252
1253 for (ch = 0; ch < 4; ch++) {
1254 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1255 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1256 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1257 int tmp_reg = get_itemp(gen);
1258
1259 /* If negative, subtract 1.0 */
1260 spe_fcgt(gen->f, tmp_reg, zero_reg, s1_reg);
1261 spe_selb(gen->f, tmp_reg, zero_reg, get_const_one_reg(gen), tmp_reg);
1262 spe_fs(gen->f, tmp_reg, s1_reg, tmp_reg);
1263
1264 /* Convert float to int */
1265 spe_cflts(gen->f, tmp_reg, tmp_reg, 0);
1266
1267 /* Convert int to float */
1268 spe_csflt(gen->f, d_reg, tmp_reg, 0);
1269
1270 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1271 free_itemps(gen);
1272 }
1273 }
1274
1275 return true;
1276 }
1277
1278 /**
1279 * Compute frac = Input - FLR(Input)
1280 */
1281 static boolean
1282 emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst)
1283 {
1284 int ch;
1285
1286 spe_comment(gen->f, -4, "FRC:");
1287
1288 int zero_reg = get_itemp(gen);
1289 spe_xor(gen->f, zero_reg, zero_reg, zero_reg);
1290
1291 for (ch = 0; ch < 4; ch++) {
1292 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1293 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1294 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1295 int tmp_reg = get_itemp(gen);
1296
1297 /* If negative, subtract 1.0 */
1298 spe_fcgt(gen->f, tmp_reg, zero_reg, s1_reg);
1299 spe_selb(gen->f, tmp_reg, zero_reg, get_const_one_reg(gen), tmp_reg);
1300 spe_fs(gen->f, tmp_reg, s1_reg, tmp_reg);
1301
1302 /* Convert float to int */
1303 spe_cflts(gen->f, tmp_reg, tmp_reg, 0);
1304
1305 /* Convert int to float */
1306 spe_csflt(gen->f, tmp_reg, tmp_reg, 0);
1307
1308 /* d = s1 - FLR(s1) */
1309 spe_fs(gen->f, d_reg, s1_reg, tmp_reg);
1310
1311 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1312 free_itemps(gen);
1313 }
1314 }
1315
1316 return true;
1317 }
1318
1319
1320 #if 0
1321 static void
1322 print_functions(struct cell_context *cell)
1323 {
1324 struct cell_spu_function_info *funcs = &cell->spu_functions;
1325 uint i;
1326 for (i = 0; i < funcs->num; i++) {
1327 printf("SPU func %u: %s at %u\n",
1328 i, funcs->names[i], funcs->addrs[i]);
1329 }
1330 }
1331 #endif
1332
1333
1334 static uint
1335 lookup_function(struct cell_context *cell, const char *funcname)
1336 {
1337 const struct cell_spu_function_info *funcs = &cell->spu_functions;
1338 uint i, addr = 0;
1339 for (i = 0; i < funcs->num; i++) {
1340 if (strcmp(funcs->names[i], funcname) == 0) {
1341 addr = funcs->addrs[i];
1342 }
1343 }
1344 assert(addr && "spu function not found");
1345 return addr / 4; /* discard 2 least significant bits */
1346 }
1347
1348
1349 /**
1350 * Emit code to call a SPU function.
1351 * Used to implement instructions like SIN/COS/POW/TEX/etc.
1352 * If scalar, only the X components of the src regs are used, and the
1353 * result is replicated across the dest register's XYZW components.
1354 */
1355 static boolean
1356 emit_function_call(struct codegen *gen,
1357 const struct tgsi_full_instruction *inst,
1358 char *funcname, uint num_args, boolean scalar)
1359 {
1360 const uint addr = lookup_function(gen->cell, funcname);
1361 char comment[100];
1362 int s_regs[3];
1363 int func_called = FALSE;
1364 uint a, ch;
1365 int retval_reg = -1;
1366
1367 assert(num_args <= 3);
1368
1369 snprintf(comment, sizeof(comment), "CALL %s:", funcname);
1370 spe_comment(gen->f, -4, comment);
1371
1372 if (scalar) {
1373 for (a = 0; a < num_args; a++) {
1374 s_regs[a] = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[a]);
1375 }
1376 /* we'll call the function, put the return value in this register,
1377 * then replicate it across all write-enabled components in d_reg.
1378 */
1379 retval_reg = spe_allocate_available_register(gen->f);
1380 }
1381
1382 for (ch = 0; ch < 4; ch++) {
1383 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1384 int d_reg;
1385 ubyte usedRegs[SPE_NUM_REGS];
1386 uint i, numUsed;
1387
1388 if (!scalar) {
1389 for (a = 0; a < num_args; a++) {
1390 s_regs[a] = get_src_reg(gen, ch, &inst->FullSrcRegisters[a]);
1391 }
1392 }
1393
1394 d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1395
1396 if (!scalar || !func_called) {
1397 /* for a scalar function, we'll really only call the function once */
1398
1399 numUsed = spe_get_registers_used(gen->f, usedRegs);
1400 assert(numUsed < gen->frame_size / 16 - 2);
1401
1402 /* save registers to stack */
1403 for (i = 0; i < numUsed; i++) {
1404 uint reg = usedRegs[i];
1405 int offset = 2 + i;
1406 spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset);
1407 }
1408
1409 /* setup function arguments */
1410 for (a = 0; a < num_args; a++) {
1411 spe_move(gen->f, 3 + a, s_regs[a]);
1412 }
1413
1414 /* branch to function, save return addr */
1415 spe_brasl(gen->f, SPE_REG_RA, addr);
1416
1417 /* save function's return value */
1418 if (scalar)
1419 spe_move(gen->f, retval_reg, 3);
1420 else
1421 spe_move(gen->f, d_reg, 3);
1422
1423 /* restore registers from stack */
1424 for (i = 0; i < numUsed; i++) {
1425 uint reg = usedRegs[i];
1426 if (reg != d_reg && reg != retval_reg) {
1427 int offset = 2 + i;
1428 spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset);
1429 }
1430 }
1431
1432 func_called = TRUE;
1433 }
1434
1435 if (scalar) {
1436 spe_move(gen->f, d_reg, retval_reg);
1437 }
1438
1439 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1440 free_itemps(gen);
1441 }
1442 }
1443
1444 if (scalar) {
1445 spe_release_register(gen->f, retval_reg);
1446 }
1447
1448 return true;
1449 }
1450
1451
1452 static boolean
1453 emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst)
1454 {
1455 const uint target = inst->InstructionExtTexture.Texture;
1456 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
1457 uint addr;
1458 int ch;
1459 int coord_regs[4], d_regs[4];
1460
1461 switch (target) {
1462 case TGSI_TEXTURE_1D:
1463 case TGSI_TEXTURE_2D:
1464 addr = lookup_function(gen->cell, "spu_tex_2d");
1465 break;
1466 case TGSI_TEXTURE_3D:
1467 addr = lookup_function(gen->cell, "spu_tex_3d");
1468 break;
1469 case TGSI_TEXTURE_CUBE:
1470 addr = lookup_function(gen->cell, "spu_tex_cube");
1471 break;
1472 default:
1473 ASSERT(0 && "unsupported texture target");
1474 return FALSE;
1475 }
1476
1477 assert(inst->FullSrcRegisters[1].SrcRegister.File == TGSI_FILE_SAMPLER);
1478
1479 spe_comment(gen->f, -4, "CALL tex:");
1480
1481 /* get src/dst reg info */
1482 for (ch = 0; ch < 4; ch++) {
1483 coord_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1484 d_regs[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1485 }
1486
1487 {
1488 ubyte usedRegs[SPE_NUM_REGS];
1489 uint i, numUsed;
1490
1491 numUsed = spe_get_registers_used(gen->f, usedRegs);
1492 assert(numUsed < gen->frame_size / 16 - 2);
1493
1494 /* save registers to stack */
1495 for (i = 0; i < numUsed; i++) {
1496 uint reg = usedRegs[i];
1497 int offset = 2 + i;
1498 spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset);
1499 }
1500
1501 /* setup function arguments (XXX depends on target) */
1502 for (i = 0; i < 4; i++) {
1503 spe_move(gen->f, 3 + i, coord_regs[i]);
1504 }
1505 spe_load_uint(gen->f, 7, unit); /* sampler unit */
1506
1507 /* branch to function, save return addr */
1508 spe_brasl(gen->f, SPE_REG_RA, addr);
1509
1510 /* save function's return values (four pixel's colors) */
1511 for (i = 0; i < 4; i++) {
1512 spe_move(gen->f, d_regs[i], 3 + i);
1513 }
1514
1515 /* restore registers from stack */
1516 for (i = 0; i < numUsed; i++) {
1517 uint reg = usedRegs[i];
1518 if (reg != d_regs[0] &&
1519 reg != d_regs[1] &&
1520 reg != d_regs[2] &&
1521 reg != d_regs[3]) {
1522 int offset = 2 + i;
1523 spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset);
1524 }
1525 }
1526 }
1527
1528 for (ch = 0; ch < 4; ch++) {
1529 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1530 store_dest_reg(gen, d_regs[ch], ch, &inst->FullDstRegisters[0]);
1531 free_itemps(gen);
1532 }
1533 }
1534
1535 return TRUE;
1536 }
1537
1538
1539 /**
1540 * KILL if any of src reg values are less than zero.
1541 */
1542 static boolean
1543 emit_KIL(struct codegen *gen, const struct tgsi_full_instruction *inst)
1544 {
1545 int ch;
1546 int s_regs[4], kil_reg = -1, cmp_reg, zero_reg;
1547
1548 spe_comment(gen->f, -4, "CALL kil:");
1549
1550 /* zero = {0,0,0,0} */
1551 zero_reg = get_itemp(gen);
1552 spe_load_uint(gen->f, zero_reg, 0);
1553
1554 cmp_reg = get_itemp(gen);
1555
1556 /* get src regs */
1557 for (ch = 0; ch < 4; ch++) {
1558 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1559 s_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1560 }
1561 }
1562
1563 /* test if any src regs are < 0 */
1564 for (ch = 0; ch < 4; ch++) {
1565 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1566 if (kil_reg >= 0) {
1567 /* cmp = 0 > src ? : ~0 : 0 */
1568 spe_fcgt(gen->f, cmp_reg, zero_reg, s_regs[ch]);
1569 /* kil = kil | cmp */
1570 spe_or(gen->f, kil_reg, kil_reg, cmp_reg);
1571 }
1572 else {
1573 kil_reg = get_itemp(gen);
1574 /* kil = 0 > src ? : ~0 : 0 */
1575 spe_fcgt(gen->f, kil_reg, zero_reg, s_regs[ch]);
1576 }
1577 }
1578 }
1579
1580 if (gen->if_nesting) {
1581 /* may have been a conditional kil */
1582 spe_and(gen->f, kil_reg, kil_reg, gen->exec_mask_reg);
1583 }
1584
1585 /* allocate the kill mask reg if needed */
1586 if (gen->kill_mask_reg <= 0) {
1587 gen->kill_mask_reg = spe_allocate_available_register(gen->f);
1588 spe_move(gen->f, gen->kill_mask_reg, kil_reg);
1589 }
1590 else {
1591 spe_or(gen->f, gen->kill_mask_reg, gen->kill_mask_reg, kil_reg);
1592 }
1593
1594 free_itemps(gen);
1595
1596 return TRUE;
1597 }
1598
1599
1600
1601 /**
1602 * Emit max. See emit_SGT for comments.
1603 */
1604 static boolean
1605 emit_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst)
1606 {
1607 int ch, s0_reg[4], s1_reg[4], d_reg[4], tmp_reg[4];
1608
1609 spe_comment(gen->f, -4, "MAX:");
1610
1611 for (ch = 0; ch < 4; ch++) {
1612 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1613 s0_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1614 s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
1615 d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1616 tmp_reg[ch] = get_itemp(gen);
1617 }
1618 }
1619
1620 /* d = (s0 > s1) ? s0 : s1 */
1621 for (ch = 0; ch < 4; ch++) {
1622 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1623 spe_fcgt(gen->f, tmp_reg[ch], s0_reg[ch], s1_reg[ch]);
1624 }
1625 }
1626 for (ch = 0; ch < 4; ch++) {
1627 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1628 spe_selb(gen->f, d_reg[ch], s1_reg[ch], s0_reg[ch], tmp_reg[ch]);
1629 }
1630 }
1631
1632 for (ch = 0; ch < 4; ch++) {
1633 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1634 store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
1635 }
1636 }
1637
1638 free_itemps(gen);
1639 return true;
1640 }
1641
1642 /**
1643 * Emit max. See emit_SGT for comments.
1644 */
1645 static boolean
1646 emit_MIN(struct codegen *gen, const struct tgsi_full_instruction *inst)
1647 {
1648 int ch, s0_reg[4], s1_reg[4], d_reg[4], tmp_reg[4];
1649
1650 spe_comment(gen->f, -4, "MIN:");
1651
1652 for (ch = 0; ch < 4; ch++) {
1653 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1654 s0_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1655 s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
1656 d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1657 tmp_reg[ch] = get_itemp(gen);
1658 }
1659 }
1660
1661 /* d = (s1 > s0) ? s0 : s1 */
1662 for (ch = 0; ch < 4; ch++) {
1663 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1664 spe_fcgt(gen->f, tmp_reg[ch], s1_reg[ch], s0_reg[ch]);
1665 }
1666 }
1667 for (ch = 0; ch < 4; ch++) {
1668 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1669 spe_selb(gen->f, d_reg[ch], s1_reg[ch], s0_reg[ch], tmp_reg[ch]);
1670 }
1671 }
1672
1673 for (ch = 0; ch < 4; ch++) {
1674 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1675 store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
1676 }
1677 }
1678
1679 free_itemps(gen);
1680 return true;
1681 }
1682
1683 static boolean
1684 emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst)
1685 {
1686 const int channel = 0;
1687 const int exec_reg = get_exec_mask_reg(gen);
1688
1689 spe_comment(gen->f, -4, "IF:");
1690
1691 /* update execution mask with the predicate register */
1692 int tmp_reg = get_itemp(gen);
1693 int s1_reg = get_src_reg(gen, channel, &inst->FullSrcRegisters[0]);
1694
1695 /* tmp = (s1_reg == 0) */
1696 spe_ceqi(gen->f, tmp_reg, s1_reg, 0);
1697 /* tmp = !tmp */
1698 spe_complement(gen->f, tmp_reg, tmp_reg);
1699 /* exec_mask = exec_mask & tmp */
1700 spe_and(gen->f, exec_reg, exec_reg, tmp_reg);
1701
1702 gen->if_nesting++;
1703
1704 free_itemps(gen);
1705
1706 return true;
1707 }
1708
1709
1710 static boolean
1711 emit_ELSE(struct codegen *gen, const struct tgsi_full_instruction *inst)
1712 {
1713 const int exec_reg = get_exec_mask_reg(gen);
1714
1715 spe_comment(gen->f, -4, "ELSE:");
1716
1717 /* exec_mask = !exec_mask */
1718 spe_complement(gen->f, exec_reg, exec_reg);
1719
1720 return true;
1721 }
1722
1723
1724 static boolean
1725 emit_ENDIF(struct codegen *gen, const struct tgsi_full_instruction *inst)
1726 {
1727 const int exec_reg = get_exec_mask_reg(gen);
1728
1729 spe_comment(gen->f, -4, "ENDIF:");
1730
1731 /* XXX todo: pop execution mask */
1732
1733 spe_load_int(gen->f, exec_reg, ~0x0);
1734
1735 gen->if_nesting--;
1736 return true;
1737 }
1738
1739
1740 static boolean
1741 emit_DDX_DDY(struct codegen *gen, const struct tgsi_full_instruction *inst,
1742 boolean ddx)
1743 {
1744 int ch;
1745
1746 spe_comment(gen->f, -4, ddx ? "DDX:" : "DDY:");
1747
1748 for (ch = 0; ch < 4; ch++) {
1749 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1750 int s_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1751 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1752
1753 int t1_reg = get_itemp(gen);
1754 int t2_reg = get_itemp(gen);
1755
1756 spe_splat_word(gen->f, t1_reg, s_reg, 0); /* upper-left pixel */
1757 if (ddx) {
1758 spe_splat_word(gen->f, t2_reg, s_reg, 1); /* upper-right pixel */
1759 }
1760 else {
1761 spe_splat_word(gen->f, t2_reg, s_reg, 2); /* lower-left pixel */
1762 }
1763 spe_fs(gen->f, d_reg, t2_reg, t1_reg);
1764
1765 free_itemps(gen);
1766 }
1767 }
1768
1769 return true;
1770 }
1771
1772
1773
1774
1775 /**
1776 * Emit END instruction.
1777 * We just return from the shader function at this point.
1778 *
1779 * Note that there may be more code after this that would be
1780 * called by TGSI_OPCODE_CALL.
1781 */
1782 static boolean
1783 emit_END(struct codegen *gen)
1784 {
1785 spe_comment(gen->f, -4, "END:");
1786 emit_epilogue(gen);
1787 return true;
1788 }
1789
1790
1791 /**
1792 * Emit code for the given instruction. Just a big switch stmt.
1793 */
1794 static boolean
1795 emit_instruction(struct codegen *gen,
1796 const struct tgsi_full_instruction *inst)
1797 {
1798 switch (inst->Instruction.Opcode) {
1799 case TGSI_OPCODE_MOV:
1800 case TGSI_OPCODE_SWZ:
1801 return emit_MOV(gen, inst);
1802 case TGSI_OPCODE_MUL:
1803 return emit_MUL(gen, inst);
1804 case TGSI_OPCODE_ADD:
1805 return emit_ADD(gen, inst);
1806 case TGSI_OPCODE_SUB:
1807 return emit_SUB(gen, inst);
1808 case TGSI_OPCODE_MAD:
1809 return emit_MAD(gen, inst);
1810 case TGSI_OPCODE_LERP:
1811 return emit_LERP(gen, inst);
1812 case TGSI_OPCODE_DP3:
1813 return emit_DP3(gen, inst);
1814 case TGSI_OPCODE_DP4:
1815 return emit_DP4(gen, inst);
1816 case TGSI_OPCODE_DPH:
1817 return emit_DPH(gen, inst);
1818 case TGSI_OPCODE_NRM:
1819 return emit_NRM3(gen, inst);
1820 case TGSI_OPCODE_XPD:
1821 return emit_XPD(gen, inst);
1822 case TGSI_OPCODE_RCP:
1823 return emit_RCP(gen, inst);
1824 case TGSI_OPCODE_RSQ:
1825 return emit_RSQ(gen, inst);
1826 case TGSI_OPCODE_ABS:
1827 return emit_ABS(gen, inst);
1828 case TGSI_OPCODE_SGT:
1829 return emit_SGT(gen, inst);
1830 case TGSI_OPCODE_SLT:
1831 return emit_SLT(gen, inst);
1832 case TGSI_OPCODE_SGE:
1833 return emit_SGE(gen, inst);
1834 case TGSI_OPCODE_SLE:
1835 return emit_SLE(gen, inst);
1836 case TGSI_OPCODE_SEQ:
1837 return emit_SEQ(gen, inst);
1838 case TGSI_OPCODE_SNE:
1839 return emit_SNE(gen, inst);
1840 case TGSI_OPCODE_CMP:
1841 return emit_CMP(gen, inst);
1842 case TGSI_OPCODE_MAX:
1843 return emit_MAX(gen, inst);
1844 case TGSI_OPCODE_MIN:
1845 return emit_MIN(gen, inst);
1846 case TGSI_OPCODE_TRUNC:
1847 return emit_TRUNC(gen, inst);
1848 case TGSI_OPCODE_FLR:
1849 return emit_FLR(gen, inst);
1850 case TGSI_OPCODE_FRC:
1851 return emit_FRC(gen, inst);
1852 case TGSI_OPCODE_END:
1853 return emit_END(gen);
1854
1855 case TGSI_OPCODE_COS:
1856 return emit_function_call(gen, inst, "spu_cos", 1, TRUE);
1857 case TGSI_OPCODE_SIN:
1858 return emit_function_call(gen, inst, "spu_sin", 1, TRUE);
1859 case TGSI_OPCODE_POW:
1860 return emit_function_call(gen, inst, "spu_pow", 2, TRUE);
1861 case TGSI_OPCODE_EXPBASE2:
1862 return emit_function_call(gen, inst, "spu_exp2", 1, TRUE);
1863 case TGSI_OPCODE_LOGBASE2:
1864 return emit_function_call(gen, inst, "spu_log2", 1, TRUE);
1865 case TGSI_OPCODE_TEX:
1866 /* fall-through for now */
1867 case TGSI_OPCODE_TXD:
1868 /* fall-through for now */
1869 case TGSI_OPCODE_TXB:
1870 /* fall-through for now */
1871 case TGSI_OPCODE_TXL:
1872 /* fall-through for now */
1873 case TGSI_OPCODE_TXP:
1874 return emit_TEX(gen, inst);
1875 case TGSI_OPCODE_KIL:
1876 return emit_KIL(gen, inst);
1877
1878 case TGSI_OPCODE_IF:
1879 return emit_IF(gen, inst);
1880 case TGSI_OPCODE_ELSE:
1881 return emit_ELSE(gen, inst);
1882 case TGSI_OPCODE_ENDIF:
1883 return emit_ENDIF(gen, inst);
1884
1885 case TGSI_OPCODE_DDX:
1886 return emit_DDX_DDY(gen, inst, true);
1887 case TGSI_OPCODE_DDY:
1888 return emit_DDX_DDY(gen, inst, false);
1889
1890 /* XXX lots more cases to do... */
1891
1892 default:
1893 fprintf(stderr, "Cell: unimplemented TGSI instruction %d!\n",
1894 inst->Instruction.Opcode);
1895 return false;
1896 }
1897
1898 return true;
1899 }
1900
1901
1902
1903 /**
1904 * Emit code for a TGSI immediate value (vector of four floats).
1905 * This involves register allocation and initialization.
1906 * XXX the initialization should be done by a "prepare" stage, not
1907 * per quad execution!
1908 */
1909 static boolean
1910 emit_immediate(struct codegen *gen, const struct tgsi_full_immediate *immed)
1911 {
1912 int ch;
1913
1914 assert(gen->num_imm < MAX_TEMPS);
1915
1916 spe_comment(gen->f, -4, "IMMEDIATE:");
1917
1918 for (ch = 0; ch < 4; ch++) {
1919 float val = immed->u.ImmediateFloat32[ch].Float;
1920
1921 if (ch > 0 && val == immed->u.ImmediateFloat32[ch - 1].Float) {
1922 /* re-use previous register */
1923 gen->imm_regs[gen->num_imm][ch] = gen->imm_regs[gen->num_imm][ch - 1];
1924 }
1925 else {
1926 int reg = spe_allocate_available_register(gen->f);
1927
1928 if (reg < 0)
1929 return false;
1930
1931 /* update immediate map */
1932 gen->imm_regs[gen->num_imm][ch] = reg;
1933
1934 /* emit initializer instruction */
1935 spe_load_float(gen->f, reg, val);
1936 }
1937 }
1938
1939 gen->num_imm++;
1940
1941 return true;
1942 }
1943
1944
1945
1946 /**
1947 * Emit "code" for a TGSI declaration.
1948 * We only care about TGSI TEMPORARY register declarations at this time.
1949 * For each TGSI TEMPORARY we allocate four SPE registers.
1950 */
1951 static boolean
1952 emit_declaration(struct cell_context *cell,
1953 struct codegen *gen, const struct tgsi_full_declaration *decl)
1954 {
1955 int i, ch;
1956
1957 switch (decl->Declaration.File) {
1958 case TGSI_FILE_TEMPORARY:
1959 for (i = decl->DeclarationRange.First;
1960 i <= decl->DeclarationRange.Last;
1961 i++) {
1962 assert(i < MAX_TEMPS);
1963 for (ch = 0; ch < 4; ch++) {
1964 gen->temp_regs[i][ch] = spe_allocate_available_register(gen->f);
1965 if (gen->temp_regs[i][ch] < 0)
1966 return false; /* out of regs */
1967 }
1968
1969 /* XXX if we run out of SPE registers, we need to spill
1970 * to SPU memory. someday...
1971 */
1972
1973 {
1974 char buf[100];
1975 sprintf(buf, "TGSI temp[%d] maps to SPU regs [$%d $%d $%d $%d]", i,
1976 gen->temp_regs[i][0], gen->temp_regs[i][1],
1977 gen->temp_regs[i][2], gen->temp_regs[i][3]);
1978 spe_comment(gen->f, -4, buf);
1979 }
1980 }
1981 break;
1982 default:
1983 ; /* ignore */
1984 }
1985
1986 return true;
1987 }
1988
1989
1990
1991 /**
1992 * Translate TGSI shader code to SPE instructions. This is done when
1993 * the state tracker gives us a new shader (via pipe->create_fs_state()).
1994 *
1995 * \param cell the rendering context (in)
1996 * \param tokens the TGSI shader (in)
1997 * \param f the generated function (out)
1998 */
1999 boolean
2000 cell_gen_fragment_program(struct cell_context *cell,
2001 const struct tgsi_token *tokens,
2002 struct spe_function *f)
2003 {
2004 struct tgsi_parse_context parse;
2005 struct codegen gen;
2006
2007 memset(&gen, 0, sizeof(gen));
2008 gen.cell = cell;
2009 gen.f = f;
2010
2011 /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
2012 gen.inputs_reg = 3; /* pointer to inputs array */
2013 gen.outputs_reg = 4; /* pointer to outputs array */
2014 gen.constants_reg = 5; /* pointer to constants array */
2015
2016 spe_init_func(f, SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE);
2017 spe_allocate_register(f, gen.inputs_reg);
2018 spe_allocate_register(f, gen.outputs_reg);
2019 spe_allocate_register(f, gen.constants_reg);
2020
2021 if (cell->debug_flags & CELL_DEBUG_ASM) {
2022 spe_print_code(f, true);
2023 spe_indent(f, 8);
2024 printf("Begin %s\n", __FUNCTION__);
2025 tgsi_dump(tokens, 0);
2026 }
2027
2028 tgsi_parse_init(&parse, tokens);
2029
2030 emit_prologue(&gen);
2031
2032 while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) {
2033 tgsi_parse_token(&parse);
2034
2035 switch (parse.FullToken.Token.Type) {
2036 case TGSI_TOKEN_TYPE_IMMEDIATE:
2037 if (!emit_immediate(&gen, &parse.FullToken.FullImmediate))
2038 gen.error = true;
2039 break;
2040
2041 case TGSI_TOKEN_TYPE_DECLARATION:
2042 if (!emit_declaration(cell, &gen, &parse.FullToken.FullDeclaration))
2043 gen.error = true;
2044 break;
2045
2046 case TGSI_TOKEN_TYPE_INSTRUCTION:
2047 if (!emit_instruction(&gen, &parse.FullToken.FullInstruction))
2048 gen.error = true;
2049 break;
2050
2051 default:
2052 assert(0);
2053 }
2054 }
2055
2056 if (gen.error) {
2057 /* terminate the SPE code */
2058 return emit_END(&gen);
2059 }
2060
2061 if (cell->debug_flags & CELL_DEBUG_ASM) {
2062 printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst);
2063 printf("End %s\n", __FUNCTION__);
2064 }
2065
2066 tgsi_parse_free( &parse );
2067
2068 return !gen.error;
2069 }