493ee1a0c9c04a452f9d2fda9235f6a4ca465d5e
[mesa.git] / src / gallium / drivers / cell / ppu / cell_gen_fp.c
1 /**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29
30 /**
31 * Generate SPU fragment program/shader code.
32 *
33 * Note that we generate SOA-style code here. So each TGSI instruction
34 * operates on four pixels (and is translated into four SPU instructions,
35 * generally speaking).
36 *
37 * \author Brian Paul
38 */
39
40 #include <math.h>
41 #include "pipe/p_defines.h"
42 #include "pipe/p_state.h"
43 #include "pipe/p_shader_tokens.h"
44 #include "tgsi/tgsi_parse.h"
45 #include "tgsi/tgsi_util.h"
46 #include "tgsi/tgsi_exec.h"
47 #include "tgsi/tgsi_dump.h"
48 #include "rtasm/rtasm_ppc_spe.h"
49 #include "util/u_memory.h"
50 #include "cell_context.h"
51 #include "cell_gen_fp.h"
52
53
54 #define MAX_TEMPS 16
55 #define MAX_IMMED 8
56
57 #define CHAN_X 0
58 #define CHAN_Y 1
59 #define CHAN_Z 2
60 #define CHAN_W 3
61
62 /**
63 * Context needed during code generation.
64 */
65 struct codegen
66 {
67 struct cell_context *cell;
68 int inputs_reg; /**< 1st function parameter */
69 int outputs_reg; /**< 2nd function parameter */
70 int constants_reg; /**< 3rd function parameter */
71 int temp_regs[MAX_TEMPS][4]; /**< maps TGSI temps to SPE registers */
72 int imm_regs[MAX_IMMED][4]; /**< maps TGSI immediates to SPE registers */
73
74 int num_imm; /**< number of immediates */
75
76 int one_reg; /**< register containing {1.0, 1.0, 1.0, 1.0} */
77
78 /** Per-instruction temps / intermediate temps */
79 int num_itemps;
80 int itemps[12];
81
82 /** Current IF/ELSE/ENDIF nesting level */
83 int if_nesting;
84 /** Index of execution mask register */
85 int exec_mask_reg;
86
87 /** KIL mask: indicates which fragments have been killed */
88 int kill_mask_reg;
89
90 int frame_size; /**< Stack frame size, in words */
91
92 struct spe_function *f;
93 boolean error;
94 };
95
96
97 /**
98 * Allocate an intermediate temporary register.
99 */
100 static int
101 get_itemp(struct codegen *gen)
102 {
103 int t = spe_allocate_available_register(gen->f);
104 assert(gen->num_itemps < Elements(gen->itemps));
105 gen->itemps[gen->num_itemps++] = t;
106 return t;
107 }
108
109 /**
110 * Free all intermediate temporary registers. To be called after each
111 * instruction has been emitted.
112 */
113 static void
114 free_itemps(struct codegen *gen)
115 {
116 int i;
117 for (i = 0; i < gen->num_itemps; i++) {
118 spe_release_register(gen->f, gen->itemps[i]);
119 }
120 gen->num_itemps = 0;
121 }
122
123
124 /**
125 * Return index of an SPE register containing {1.0, 1.0, 1.0, 1.0}.
126 * The register is allocated and initialized upon the first call.
127 */
128 static int
129 get_const_one_reg(struct codegen *gen)
130 {
131 if (gen->one_reg <= 0) {
132 gen->one_reg = spe_allocate_available_register(gen->f);
133
134 spe_indent(gen->f, 4);
135 spe_comment(gen->f, -4, "INIT CONSTANT 1.0:");
136
137 /* one = {1.0, 1.0, 1.0, 1.0} */
138 spe_load_float(gen->f, gen->one_reg, 1.0f);
139
140 spe_indent(gen->f, -4);
141 }
142
143 return gen->one_reg;
144 }
145
146
147 /**
148 * Return index of the pixel execution mask.
149 * The register is allocated an initialized upon the first call.
150 *
151 * The pixel execution mask controls which pixels in a quad are
152 * modified, according to surrounding conditionals, loops, etc.
153 */
154 static int
155 get_exec_mask_reg(struct codegen *gen)
156 {
157 if (gen->exec_mask_reg <= 0) {
158 gen->exec_mask_reg = spe_allocate_available_register(gen->f);
159
160 spe_indent(gen->f, 4);
161 spe_comment(gen->f, -4, "INIT EXEC MASK = ~0:");
162
163 /* exec_mask = {~0, ~0, ~0, ~0} */
164 spe_load_int(gen->f, gen->exec_mask_reg, ~0);
165
166 spe_indent(gen->f, -4);
167 }
168
169 return gen->exec_mask_reg;
170 }
171
172
173 static boolean
174 is_register_src(struct codegen *gen, int channel,
175 const struct tgsi_full_src_register *src)
176 {
177 int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel);
178 int sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel);
179
180 if (swizzle > TGSI_SWIZZLE_W || sign_op != TGSI_UTIL_SIGN_KEEP) {
181 return FALSE;
182 }
183 if (src->SrcRegister.File == TGSI_FILE_TEMPORARY ||
184 src->SrcRegister.File == TGSI_FILE_IMMEDIATE) {
185 return TRUE;
186 }
187 return FALSE;
188 }
189
190
191 static boolean
192 is_memory_dst(struct codegen *gen, int channel,
193 const struct tgsi_full_dst_register *dst)
194 {
195 if (dst->DstRegister.File == TGSI_FILE_OUTPUT) {
196 return TRUE;
197 }
198 else {
199 return FALSE;
200 }
201 }
202
203
204 /**
205 * Return the index of the SPU temporary containing the named TGSI
206 * source register. If the TGSI register is a TGSI_FILE_TEMPORARY we
207 * just return the corresponding SPE register. If the TGIS register
208 * is TGSI_FILE_INPUT/CONSTANT/IMMEDIATE we allocate a new SPE register
209 * and emit an SPE load instruction.
210 */
211 static int
212 get_src_reg(struct codegen *gen,
213 int channel,
214 const struct tgsi_full_src_register *src)
215 {
216 int reg = -1;
217 int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel);
218 boolean reg_is_itemp = FALSE;
219 uint sign_op;
220
221 assert(swizzle >= TGSI_SWIZZLE_X);
222 assert(swizzle <= TGSI_EXTSWIZZLE_ONE);
223
224 if (swizzle == TGSI_EXTSWIZZLE_ONE) {
225 /* Load const one float and early out */
226 reg = get_const_one_reg(gen);
227 }
228 else if (swizzle == TGSI_EXTSWIZZLE_ZERO) {
229 /* Load const zero float and early out */
230 reg = get_itemp(gen);
231 spe_xor(gen->f, reg, reg, reg);
232 }
233 else {
234 assert(swizzle < 4);
235
236 switch (src->SrcRegister.File) {
237 case TGSI_FILE_TEMPORARY:
238 reg = gen->temp_regs[src->SrcRegister.Index][swizzle];
239 break;
240 case TGSI_FILE_INPUT:
241 {
242 /* offset is measured in quadwords, not bytes */
243 int offset = src->SrcRegister.Index * 4 + swizzle;
244 reg = get_itemp(gen);
245 reg_is_itemp = TRUE;
246 /* Load: reg = memory[(machine_reg) + offset] */
247 spe_lqd(gen->f, reg, gen->inputs_reg, offset * 16);
248 }
249 break;
250 case TGSI_FILE_IMMEDIATE:
251 reg = gen->imm_regs[src->SrcRegister.Index][swizzle];
252 break;
253 case TGSI_FILE_CONSTANT:
254 {
255 /* offset is measured in quadwords, not bytes */
256 int offset = src->SrcRegister.Index * 4 + swizzle;
257 reg = get_itemp(gen);
258 reg_is_itemp = TRUE;
259 /* Load: reg = memory[(machine_reg) + offset] */
260 spe_lqd(gen->f, reg, gen->constants_reg, offset * 16);
261 }
262 break;
263 default:
264 assert(0);
265 }
266 }
267
268 /*
269 * Handle absolute value, negate or set-negative of src register.
270 */
271 sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel);
272 if (sign_op != TGSI_UTIL_SIGN_KEEP) {
273 /*
274 * All sign ops are done by manipulating bit 31, the IEEE float sign bit.
275 */
276 const int bit31mask_reg = get_itemp(gen);
277 int result_reg;
278
279 if (reg_is_itemp) {
280 /* re-use 'reg' for the result */
281 result_reg = reg;
282 }
283 else {
284 /* alloc a new reg for the result */
285 result_reg = get_itemp(gen);
286 }
287
288 /* mask with bit 31 set, the rest cleared */
289 spe_load_uint(gen->f, bit31mask_reg, (1 << 31));
290
291 if (sign_op == TGSI_UTIL_SIGN_CLEAR) {
292 spe_andc(gen->f, result_reg, reg, bit31mask_reg);
293 }
294 else if (sign_op == TGSI_UTIL_SIGN_SET) {
295 spe_and(gen->f, result_reg, reg, bit31mask_reg);
296 }
297 else {
298 assert(sign_op == TGSI_UTIL_SIGN_TOGGLE);
299 spe_xor(gen->f, result_reg, reg, bit31mask_reg);
300 }
301
302 reg = result_reg;
303 }
304
305 return reg;
306 }
307
308
309 /**
310 * Return the index of an SPE register to use for the given TGSI register.
311 * If the TGSI register is TGSI_FILE_TEMPORARAY, the index of the
312 * corresponding SPE register is returned. If the TGSI register is
313 * TGSI_FILE_OUTPUT we allocate an intermediate temporary register.
314 * See store_dest_reg() below...
315 */
316 static int
317 get_dst_reg(struct codegen *gen,
318 int channel,
319 const struct tgsi_full_dst_register *dest)
320 {
321 int reg = -1;
322
323 switch (dest->DstRegister.File) {
324 case TGSI_FILE_TEMPORARY:
325 if (gen->if_nesting > 0)
326 reg = get_itemp(gen);
327 else
328 reg = gen->temp_regs[dest->DstRegister.Index][channel];
329 break;
330 case TGSI_FILE_OUTPUT:
331 reg = get_itemp(gen);
332 break;
333 default:
334 assert(0);
335 }
336
337 return reg;
338 }
339
340
341 /**
342 * When a TGSI instruction is writing to an output register, this
343 * function emits the SPE store instruction to store the value_reg.
344 * \param value_reg the SPE register containing the value to store.
345 * This would have been returned by get_dst_reg().
346 */
347 static void
348 store_dest_reg(struct codegen *gen,
349 int value_reg, int channel,
350 const struct tgsi_full_dst_register *dest)
351 {
352 switch (dest->DstRegister.File) {
353 case TGSI_FILE_TEMPORARY:
354 if (gen->if_nesting > 0) {
355 int d_reg = gen->temp_regs[dest->DstRegister.Index][channel];
356 int exec_reg = get_exec_mask_reg(gen);
357 /* Mix d with new value according to exec mask:
358 * d[i] = mask_reg[i] ? value_reg : d_reg
359 */
360 spe_selb(gen->f, d_reg, d_reg, value_reg, exec_reg);
361 }
362 else {
363 /* we're not inside a condition or loop: do nothing special */
364
365 }
366 break;
367 case TGSI_FILE_OUTPUT:
368 {
369 /* offset is measured in quadwords, not bytes */
370 int offset = dest->DstRegister.Index * 4 + channel;
371 if (gen->if_nesting > 0) {
372 int exec_reg = get_exec_mask_reg(gen);
373 int curval_reg = get_itemp(gen);
374 /* First read the current value from memory:
375 * Load: curval = memory[(machine_reg) + offset]
376 */
377 spe_lqd(gen->f, curval_reg, gen->outputs_reg, offset * 16);
378 /* Mix curval with newvalue according to exec mask:
379 * d[i] = mask_reg[i] ? value_reg : d_reg
380 */
381 spe_selb(gen->f, curval_reg, curval_reg, value_reg, exec_reg);
382 /* Store: memory[(machine_reg) + offset] = curval */
383 spe_stqd(gen->f, curval_reg, gen->outputs_reg, offset * 16);
384 }
385 else {
386 /* Store: memory[(machine_reg) + offset] = reg */
387 spe_stqd(gen->f, value_reg, gen->outputs_reg, offset * 16);
388 }
389 }
390 break;
391 default:
392 assert(0);
393 }
394 }
395
396
397
398 static void
399 emit_prologue(struct codegen *gen)
400 {
401 gen->frame_size = 1024; /* XXX temporary, should be dynamic */
402
403 spe_comment(gen->f, -4, "Function prologue:");
404
405 /* save $lr on stack # stqd $lr,16($sp) */
406 spe_stqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16);
407
408 if (gen->frame_size >= 512) {
409 /* offset is too large for ai instruction */
410 int offset_reg = spe_allocate_available_register(gen->f);
411 int sp_reg = spe_allocate_available_register(gen->f);
412 /* offset = -framesize */
413 spe_load_int(gen->f, offset_reg, -gen->frame_size);
414 /* sp = $sp */
415 spe_move(gen->f, sp_reg, SPE_REG_SP);
416 /* $sp = $sp + offset_reg */
417 spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg);
418 /* save $sp in stack frame */
419 spe_stqd(gen->f, sp_reg, SPE_REG_SP, 0);
420 /* clean up */
421 spe_release_register(gen->f, offset_reg);
422 spe_release_register(gen->f, sp_reg);
423 }
424 else {
425 /* save stack pointer # stqd $sp,-frameSize($sp) */
426 spe_stqd(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size);
427
428 /* adjust stack pointer # ai $sp,$sp,-frameSize */
429 spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size);
430 }
431 }
432
433
434 static void
435 emit_epilogue(struct codegen *gen)
436 {
437 const int return_reg = 3;
438
439 spe_comment(gen->f, -4, "Function epilogue:");
440
441 spe_comment(gen->f, 0, "return the killed mask");
442 if (gen->kill_mask_reg > 0) {
443 /* shader called KIL, return the "alive" mask */
444 spe_move(gen->f, return_reg, gen->kill_mask_reg);
445 }
446 else {
447 /* return {0,0,0,0} */
448 spe_load_uint(gen->f, return_reg, 0);
449 }
450
451 spe_comment(gen->f, 0, "restore stack and return");
452 if (gen->frame_size >= 512) {
453 /* offset is too large for ai instruction */
454 int offset_reg = spe_allocate_available_register(gen->f);
455 /* offset = framesize */
456 spe_load_int(gen->f, offset_reg, gen->frame_size);
457 /* $sp = $sp + offset */
458 spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg);
459 /* clean up */
460 spe_release_register(gen->f, offset_reg);
461 }
462 else {
463 /* restore stack pointer # ai $sp,$sp,frameSize */
464 spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, gen->frame_size);
465 }
466
467 /* restore $lr # lqd $lr,16($sp) */
468 spe_lqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16);
469
470 /* return from function call */
471 spe_bi(gen->f, SPE_REG_RA, 0, 0);
472 }
473
474
475 static boolean
476 emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst)
477 {
478 int ch, src_reg[4], dst_reg[4];
479
480 spe_comment(gen->f, -4, "MOV:");
481 for (ch = 0; ch < 4; ch++) {
482 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
483 src_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
484 dst_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
485 }
486 }
487
488 for (ch = 0; ch < 4; ch++) {
489 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
490 if (is_register_src(gen, ch, &inst->FullSrcRegisters[0]) &&
491 is_memory_dst(gen, ch, &inst->FullDstRegisters[0])) {
492 /* special-case: register to memory store */
493 store_dest_reg(gen, src_reg[ch], ch, &inst->FullDstRegisters[0]);
494 }
495 else {
496 spe_move(gen->f, dst_reg[ch], src_reg[ch]);
497 store_dest_reg(gen, dst_reg[ch], ch, &inst->FullDstRegisters[0]);
498 }
499 free_itemps(gen);
500 }
501 }
502 return true;
503 }
504
505 /**
506 * Emit addition instructions. Recall that a single TGSI_OPCODE_ADD
507 * becomes (up to) four SPU "fa" instructions because we're doing SOA
508 * processing.
509 */
510 static boolean
511 emit_ADD(struct codegen *gen, const struct tgsi_full_instruction *inst)
512 {
513 int ch, s1_reg[4], s2_reg[4], d_reg[4];
514
515 spe_comment(gen->f, -4, "ADD:");
516 /* Loop over Red/Green/Blue/Alpha channels, fetch src operands */
517 for (ch = 0; ch < 4; ch++) {
518 /* If the dest R, G, B or A writemask is enabled... */
519 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
520 s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
521 s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
522 d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
523 }
524 }
525 /* Loop over Red/Green/Blue/Alpha channels, do the add, store results */
526 for (ch = 0; ch < 4; ch++) {
527 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
528 /* Emit actual SPE instruction: d = s1 + s2 */
529 spe_fa(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
530 /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
531 store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
532 /* Free any intermediate temps we allocated */
533 free_itemps(gen);
534 }
535 }
536 return true;
537 }
538
539 /**
540 * Emit subtract. See emit_ADD for comments.
541 */
542 static boolean
543 emit_SUB(struct codegen *gen, const struct tgsi_full_instruction *inst)
544 {
545 int ch, s1_reg[4], s2_reg[4], d_reg[4];
546 spe_comment(gen->f, -4, "SUB:");
547 for (ch = 0; ch < 4; ch++) {
548 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
549 s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
550 s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
551 d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
552 }
553 }
554 for (ch = 0; ch < 4; ch++) {
555 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
556 /* d = s1 - s2 */
557 spe_fs(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
558 store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
559 free_itemps(gen);
560 }
561 }
562 return true;
563 }
564
565 /**
566 * Emit multiply add. See emit_ADD for comments.
567 */
568 static boolean
569 emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst)
570 {
571 int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4];
572 spe_comment(gen->f, -4, "MAD:");
573 for (ch = 0; ch < 4; ch++) {
574 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
575 s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
576 s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
577 s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
578 d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
579 }
580 }
581 for (ch = 0; ch < 4; ch++) {
582 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
583 /* d = s1 * s2 + s3 */
584 spe_fma(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch], s3_reg[ch]);
585 store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
586 free_itemps(gen);
587 }
588 }
589 return true;
590 }
591
592
593 /**
594 * Emit linear interpolate. See emit_ADD for comments.
595 */
596 static boolean
597 emit_LERP(struct codegen *gen, const struct tgsi_full_instruction *inst)
598 {
599 int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4], tmp_reg[4];
600 spe_comment(gen->f, -4, "LERP:");
601 /* setup/get src/dst/temp regs */
602 for (ch = 0; ch < 4; ch++) {
603 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
604 s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
605 s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
606 s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
607 d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
608 tmp_reg[ch] = get_itemp(gen);
609 }
610 }
611
612 /* d = s3 + s1(s2 - s3) */
613 /* do all subtracts, then all fma, then all stores to better pipeline */
614 for (ch = 0; ch < 4; ch++) {
615 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
616 spe_fs(gen->f, tmp_reg[ch], s2_reg[ch], s3_reg[ch]);
617 }
618 }
619 for (ch = 0; ch < 4; ch++) {
620 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
621 spe_fma(gen->f, d_reg[ch], tmp_reg[ch], s1_reg[ch], s3_reg[ch]);
622 }
623 }
624 for (ch = 0; ch < 4; ch++) {
625 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
626 store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
627 }
628 }
629 free_itemps(gen);
630 return true;
631 }
632
633 /**
634 * Emit multiply. See emit_ADD for comments.
635 */
636 static boolean
637 emit_MUL(struct codegen *gen, const struct tgsi_full_instruction *inst)
638 {
639 int ch, s1_reg[4], s2_reg[4], d_reg[4];
640 spe_comment(gen->f, -4, "MUL:");
641 for (ch = 0; ch < 4; ch++) {
642 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
643 s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
644 s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
645 d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
646 }
647 }
648 for (ch = 0; ch < 4; ch++) {
649 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
650 /* d = s1 * s2 */
651 spe_fm(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]);
652 store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
653 free_itemps(gen);
654 }
655 }
656 return true;
657 }
658
659 /**
660 * Emit reciprocal. See emit_ADD for comments.
661 */
662 static boolean
663 emit_RCP(struct codegen *gen, const struct tgsi_full_instruction *inst)
664 {
665 int ch;
666 spe_comment(gen->f, -4, "RCP:");
667 for (ch = 0; ch < 4; ch++) {
668 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
669 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
670 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
671 /* d = 1/s1 */
672 spe_frest(gen->f, d_reg, s1_reg);
673 spe_fi(gen->f, d_reg, s1_reg, d_reg);
674 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
675 free_itemps(gen);
676 }
677 }
678 return true;
679 }
680
681 /**
682 * Emit reciprocal sqrt. See emit_ADD for comments.
683 */
684 static boolean
685 emit_RSQ(struct codegen *gen, const struct tgsi_full_instruction *inst)
686 {
687 int ch;
688 spe_comment(gen->f, -4, "RSQ:");
689 for (ch = 0; ch < 4; ch++) {
690 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
691 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
692 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
693 /* d = 1/s1 */
694 spe_frsqest(gen->f, d_reg, s1_reg);
695 spe_fi(gen->f, d_reg, s1_reg, d_reg);
696 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
697 free_itemps(gen);
698 }
699 }
700 return true;
701 }
702
703 /**
704 * Emit absolute value. See emit_ADD for comments.
705 */
706 static boolean
707 emit_ABS(struct codegen *gen, const struct tgsi_full_instruction *inst)
708 {
709 int ch;
710 spe_comment(gen->f, -4, "ABS:");
711 for (ch = 0; ch < 4; ch++) {
712 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
713 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
714 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
715 const int bit31mask_reg = get_itemp(gen);
716
717 /* mask with bit 31 set, the rest cleared */
718 spe_load_uint(gen->f, bit31mask_reg, (1 << 31));
719
720 /* d = sign bit cleared in s1 */
721 spe_andc(gen->f, d_reg, s1_reg, bit31mask_reg);
722
723 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
724 free_itemps(gen);
725 }
726 }
727 return true;
728 }
729
730 /**
731 * Emit 3 component dot product. See emit_ADD for comments.
732 */
733 static boolean
734 emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst)
735 {
736 int ch;
737 int s1x_reg, s1y_reg, s1z_reg;
738 int s2x_reg, s2y_reg, s2z_reg;
739 int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen);
740
741 spe_comment(gen->f, -4, "DP3:");
742
743 s1x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
744 s2x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
745 s1y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
746 s2y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
747 s1z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
748 s2z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
749
750 /* t0 = x0 * x1 */
751 spe_fm(gen->f, t0_reg, s1x_reg, s2x_reg);
752
753 /* t1 = y0 * y1 */
754 spe_fm(gen->f, t1_reg, s1y_reg, s2y_reg);
755
756 /* t0 = z0 * z1 + t0 */
757 spe_fma(gen->f, t0_reg, s1z_reg, s2z_reg, t0_reg);
758
759 /* t0 = t0 + t1 */
760 spe_fa(gen->f, t0_reg, t0_reg, t1_reg);
761
762 for (ch = 0; ch < 4; ch++) {
763 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
764 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
765 spe_move(gen->f, d_reg, t0_reg);
766 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
767 }
768 }
769
770 free_itemps(gen);
771 return true;
772 }
773
774 /**
775 * Emit 4 component dot product. See emit_ADD for comments.
776 */
777 static boolean
778 emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst)
779 {
780 int ch;
781 int s0x_reg, s0y_reg, s0z_reg, s0w_reg;
782 int s1x_reg, s1y_reg, s1z_reg, s1w_reg;
783 int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen);
784
785 spe_comment(gen->f, -4, "DP4:");
786
787 s0x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
788 s1x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
789 s0y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
790 s1y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
791 s0z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
792 s1z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
793 s0w_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[0]);
794 s1w_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]);
795
796 /* t0 = x0 * x1 */
797 spe_fm(gen->f, t0_reg, s0x_reg, s1x_reg);
798
799 /* t1 = y0 * y1 */
800 spe_fm(gen->f, t1_reg, s0y_reg, s1y_reg);
801
802 /* t0 = z0 * z1 + t0 */
803 spe_fma(gen->f, t0_reg, s0z_reg, s1z_reg, t0_reg);
804
805 /* t1 = w0 * w1 + t1 */
806 spe_fma(gen->f, t1_reg, s0w_reg, s1w_reg, t1_reg);
807
808 /* t0 = t0 + t1 */
809 spe_fa(gen->f, t0_reg, t0_reg, t1_reg);
810
811 for (ch = 0; ch < 4; ch++) {
812 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
813 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
814 spe_move(gen->f, d_reg, t0_reg);
815 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
816 }
817 }
818
819 free_itemps(gen);
820 return true;
821 }
822
823 /**
824 * Emit homogeneous dot product. See emit_ADD for comments.
825 */
826 static boolean
827 emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst)
828 {
829 /* XXX rewrite this function to look more like DP3/DP4 */
830 int ch;
831 spe_comment(gen->f, -4, "DPH:");
832
833 int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
834 int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
835 int tmp_reg = get_itemp(gen);
836
837 /* t = x0 * x1 */
838 spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
839
840 s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
841 s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
842 /* t = y0 * y1 + t */
843 spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
844
845 s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
846 s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
847 /* t = z0 * z1 + t */
848 spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
849
850 s2_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]);
851 /* t = w1 + t */
852 spe_fa(gen->f, tmp_reg, s2_reg, tmp_reg);
853
854 for (ch = 0; ch < 4; ch++) {
855 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
856 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
857 spe_move(gen->f, d_reg, tmp_reg);
858 store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]);
859 }
860 }
861
862 free_itemps(gen);
863 return true;
864 }
865
866 /**
867 * Emit cross product. See emit_ADD for comments.
868 */
869 static boolean
870 emit_XPD(struct codegen *gen, const struct tgsi_full_instruction *inst)
871 {
872 spe_comment(gen->f, -4, "XPD:");
873
874 int s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
875 int s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
876 int tmp_reg = get_itemp(gen);
877
878 /* t = z0 * y1 */
879 spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
880
881 s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
882 s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
883 /* t = y0 * z1 - t */
884 spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
885
886 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_X)) {
887 store_dest_reg(gen, tmp_reg, CHAN_X, &inst->FullDstRegisters[0]);
888 }
889
890 s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
891 s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
892 /* t = x0 * z1 */
893 spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
894
895 s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
896 s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
897 /* t = z0 * x1 - t */
898 spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
899
900 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Y)) {
901 store_dest_reg(gen, tmp_reg, CHAN_Y, &inst->FullDstRegisters[0]);
902 }
903
904 s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
905 s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
906 /* t = y0 * x1 */
907 spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
908
909 s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
910 s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
911 /* t = x0 * y1 - t */
912 spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
913
914 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Z)) {
915 store_dest_reg(gen, tmp_reg, CHAN_Z, &inst->FullDstRegisters[0]);
916 }
917
918 free_itemps(gen);
919 return true;
920 }
921
922 /**
923 * Emit set-if-greater-than.
924 * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as
925 * the result but OpenGL/TGSI needs 0.0 and 1.0 results.
926 * We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND.
927 */
928 static boolean
929 emit_SGT(struct codegen *gen, const struct tgsi_full_instruction *inst)
930 {
931 int ch;
932
933 spe_comment(gen->f, -4, "SGT:");
934
935 for (ch = 0; ch < 4; ch++) {
936 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
937 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
938 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
939 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
940
941 /* d = (s1 > s2) */
942 spe_fcgt(gen->f, d_reg, s1_reg, s2_reg);
943
944 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
945 /* d = d & one_reg */
946 spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
947
948 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
949 free_itemps(gen);
950 }
951 }
952
953 return true;
954 }
955
956 /**
957 * Emit set-if_less-then. See emit_SGT for comments.
958 */
959 static boolean
960 emit_SLT(struct codegen *gen, const struct tgsi_full_instruction *inst)
961 {
962 int ch;
963
964 spe_comment(gen->f, -4, "SLT:");
965
966 for (ch = 0; ch < 4; ch++) {
967 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
968 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
969 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
970 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
971
972 /* d = (s1 < s2) */
973 spe_fcgt(gen->f, d_reg, s2_reg, s1_reg);
974
975 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
976 /* d = d & one_reg */
977 spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
978
979 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
980 free_itemps(gen);
981 }
982 }
983
984 return true;
985 }
986
987 /**
988 * Emit set-if_greater-then-or-equal. See emit_SGT for comments.
989 */
990 static boolean
991 emit_SGE(struct codegen *gen, const struct tgsi_full_instruction *inst)
992 {
993 int ch;
994
995 spe_comment(gen->f, -4, "SGE:");
996
997 for (ch = 0; ch < 4; ch++) {
998 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
999 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1000 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
1001 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1002
1003 /* d = (s1 >= s2) */
1004 spe_fcgt(gen->f, d_reg, s2_reg, s1_reg);
1005
1006 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
1007 /* d = ~d & one_reg */
1008 spe_andc(gen->f, d_reg, get_const_one_reg(gen), d_reg);
1009
1010 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1011 free_itemps(gen);
1012 }
1013 }
1014
1015 return true;
1016 }
1017
1018 /**
1019 * Emit set-if_less-then-or-equal. See emit_SGT for comments.
1020 */
1021 static boolean
1022 emit_SLE(struct codegen *gen, const struct tgsi_full_instruction *inst)
1023 {
1024 int ch;
1025
1026 spe_comment(gen->f, -4, "SLE:");
1027
1028 for (ch = 0; ch < 4; ch++) {
1029 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1030 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1031 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
1032 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1033
1034 /* d = (s1 <= s2) */
1035 spe_fcgt(gen->f, d_reg, s1_reg, s2_reg);
1036
1037 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
1038 /* d = ~d & one_reg */
1039 spe_andc(gen->f, d_reg, get_const_one_reg(gen), d_reg);
1040
1041 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1042 free_itemps(gen);
1043 }
1044 }
1045
1046 return true;
1047 }
1048
1049 /**
1050 * Emit set-if_equal. See emit_SGT for comments.
1051 */
1052 static boolean
1053 emit_SEQ(struct codegen *gen, const struct tgsi_full_instruction *inst)
1054 {
1055 int ch;
1056
1057 spe_comment(gen->f, -4, "SEQ:");
1058
1059 for (ch = 0; ch < 4; ch++) {
1060 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1061 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1062 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
1063 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1064
1065 /* d = (s1 == s2) */
1066 spe_fceq(gen->f, d_reg, s1_reg, s2_reg);
1067
1068 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
1069 /* d = d & one_reg */
1070 spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
1071
1072 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1073 free_itemps(gen);
1074 }
1075 }
1076
1077 return true;
1078 }
1079
1080 /**
1081 * Emit set-if_not_equal. See emit_SGT for comments.
1082 */
1083 static boolean
1084 emit_SNE(struct codegen *gen, const struct tgsi_full_instruction *inst)
1085 {
1086 int ch;
1087
1088 spe_comment(gen->f, -4, "SNE:");
1089
1090 for (ch = 0; ch < 4; ch++) {
1091 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1092 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1093 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
1094 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1095
1096 /* d = (s1 != s2) */
1097 spe_fceq(gen->f, d_reg, s1_reg, s2_reg);
1098 spe_nor(gen->f, d_reg, d_reg, d_reg);
1099
1100 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
1101 /* d = d & one_reg */
1102 spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
1103
1104 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1105 free_itemps(gen);
1106 }
1107 }
1108
1109 return true;
1110 }
1111
1112 /**
1113 * Emit compare. See emit_SGT for comments.
1114 */
1115 static boolean
1116 emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst)
1117 {
1118 int ch;
1119
1120 spe_comment(gen->f, -4, "CMP:");
1121
1122 for (ch = 0; ch < 4; ch++) {
1123 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1124 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1125 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
1126 int s3_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
1127 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1128 int zero_reg = get_itemp(gen);
1129
1130 spe_xor(gen->f, zero_reg, zero_reg, zero_reg);
1131
1132 /* d = (s1 < 0) ? s2 : s3 */
1133 spe_fcgt(gen->f, d_reg, zero_reg, s1_reg);
1134 spe_selb(gen->f, d_reg, s3_reg, s2_reg, d_reg);
1135
1136 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1137 free_itemps(gen);
1138 }
1139 }
1140
1141 return true;
1142 }
1143
1144 /**
1145 * Emit trunc.
1146 * Convert float to signed int
1147 * Convert signed int to float
1148 */
1149 static boolean
1150 emit_TRUNC(struct codegen *gen, const struct tgsi_full_instruction *inst)
1151 {
1152 int ch;
1153
1154 spe_comment(gen->f, -4, "TRUNC:");
1155
1156 for (ch = 0; ch < 4; ch++) {
1157 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1158 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1159 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1160
1161 /* Convert float to int */
1162 spe_cflts(gen->f, d_reg, s1_reg, 0);
1163
1164 /* Convert int to float */
1165 spe_csflt(gen->f, d_reg, d_reg, 0);
1166
1167 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1168 free_itemps(gen);
1169 }
1170 }
1171
1172 return true;
1173 }
1174
1175 /**
1176 * Emit floor.
1177 * If negative int subtract one
1178 * Convert float to signed int
1179 * Convert signed int to float
1180 */
1181 static boolean
1182 emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst)
1183 {
1184 int ch;
1185
1186 spe_comment(gen->f, -4, "FLR:");
1187
1188 int zero_reg = get_itemp(gen);
1189 spe_xor(gen->f, zero_reg, zero_reg, zero_reg);
1190
1191 for (ch = 0; ch < 4; ch++) {
1192 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1193 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1194 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1195 int tmp_reg = get_itemp(gen);
1196
1197 /* If negative, subtract 1.0 */
1198 spe_fcgt(gen->f, tmp_reg, zero_reg, s1_reg);
1199 spe_selb(gen->f, tmp_reg, zero_reg, get_const_one_reg(gen), tmp_reg);
1200 spe_fs(gen->f, tmp_reg, s1_reg, tmp_reg);
1201
1202 /* Convert float to int */
1203 spe_cflts(gen->f, tmp_reg, tmp_reg, 0);
1204
1205 /* Convert int to float */
1206 spe_csflt(gen->f, d_reg, tmp_reg, 0);
1207
1208 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1209 free_itemps(gen);
1210 }
1211 }
1212
1213 return true;
1214 }
1215
1216 /**
1217 * Compute frac = Input - FLR(Input)
1218 */
1219 static boolean
1220 emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst)
1221 {
1222 int ch;
1223
1224 spe_comment(gen->f, -4, "FRC:");
1225
1226 int zero_reg = get_itemp(gen);
1227 spe_xor(gen->f, zero_reg, zero_reg, zero_reg);
1228
1229 for (ch = 0; ch < 4; ch++) {
1230 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1231 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1232 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1233 int tmp_reg = get_itemp(gen);
1234
1235 /* If negative, subtract 1.0 */
1236 spe_fcgt(gen->f, tmp_reg, zero_reg, s1_reg);
1237 spe_selb(gen->f, tmp_reg, zero_reg, get_const_one_reg(gen), tmp_reg);
1238 spe_fs(gen->f, tmp_reg, s1_reg, tmp_reg);
1239
1240 /* Convert float to int */
1241 spe_cflts(gen->f, tmp_reg, tmp_reg, 0);
1242
1243 /* Convert int to float */
1244 spe_csflt(gen->f, tmp_reg, tmp_reg, 0);
1245
1246 /* d = s1 - FLR(s1) */
1247 spe_fs(gen->f, d_reg, s1_reg, tmp_reg);
1248
1249 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1250 free_itemps(gen);
1251 }
1252 }
1253
1254 return true;
1255 }
1256
1257
1258 #if 0
1259 static void
1260 print_functions(struct cell_context *cell)
1261 {
1262 struct cell_spu_function_info *funcs = &cell->spu_functions;
1263 uint i;
1264 for (i = 0; i < funcs->num; i++) {
1265 printf("SPU func %u: %s at %u\n",
1266 i, funcs->names[i], funcs->addrs[i]);
1267 }
1268 }
1269 #endif
1270
1271
1272 static uint
1273 lookup_function(struct cell_context *cell, const char *funcname)
1274 {
1275 const struct cell_spu_function_info *funcs = &cell->spu_functions;
1276 uint i, addr = 0;
1277 for (i = 0; i < funcs->num; i++) {
1278 if (strcmp(funcs->names[i], funcname) == 0) {
1279 addr = funcs->addrs[i];
1280 }
1281 }
1282 assert(addr && "spu function not found");
1283 return addr / 4; /* discard 2 least significant bits */
1284 }
1285
1286
1287 /**
1288 * Emit code to call a SPU function.
1289 * Used to implement instructions like SIN/COS/POW/TEX/etc.
1290 */
1291 static boolean
1292 emit_function_call(struct codegen *gen,
1293 const struct tgsi_full_instruction *inst,
1294 char *funcname, uint num_args)
1295 {
1296 const uint addr = lookup_function(gen->cell, funcname);
1297 char comment[100];
1298 int ch;
1299
1300 assert(num_args <= 3);
1301
1302 snprintf(comment, sizeof(comment), "CALL %s:", funcname);
1303 spe_comment(gen->f, -4, comment);
1304
1305 for (ch = 0; ch < 4; ch++) {
1306 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1307 int s_regs[3], d_reg;
1308 ubyte usedRegs[SPE_NUM_REGS];
1309 uint a, i, numUsed;
1310
1311 for (a = 0; a < num_args; a++) {
1312 s_regs[a] = get_src_reg(gen, ch, &inst->FullSrcRegisters[a]);
1313 }
1314 d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1315
1316 numUsed = spe_get_registers_used(gen->f, usedRegs);
1317 assert(numUsed < gen->frame_size / 16 - 2);
1318
1319 /* save registers to stack */
1320 for (i = 0; i < numUsed; i++) {
1321 uint reg = usedRegs[i];
1322 int offset = 2 + i;
1323 spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset);
1324 }
1325
1326 /* setup function arguments */
1327 for (a = 0; a < num_args; a++) {
1328 spe_move(gen->f, 3 + a, s_regs[a]);
1329 }
1330
1331 /* branch to function, save return addr */
1332 spe_brasl(gen->f, SPE_REG_RA, addr);
1333
1334 /* save function's return value */
1335 spe_move(gen->f, d_reg, 3);
1336
1337 /* restore registers from stack */
1338 for (i = 0; i < numUsed; i++) {
1339 uint reg = usedRegs[i];
1340 if (reg != d_reg) {
1341 int offset = 2 + i;
1342 spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset);
1343 }
1344 }
1345
1346 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1347 free_itemps(gen);
1348 }
1349 }
1350
1351 return true;
1352 }
1353
1354
1355 static boolean
1356 emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst)
1357 {
1358 const uint target = inst->InstructionExtTexture.Texture;
1359 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
1360 uint addr;
1361 int ch;
1362 int coord_regs[4], d_regs[4];
1363
1364 switch (target) {
1365 case TGSI_TEXTURE_1D:
1366 case TGSI_TEXTURE_2D:
1367 addr = lookup_function(gen->cell, "spu_tex_2d");
1368 break;
1369 case TGSI_TEXTURE_3D:
1370 addr = lookup_function(gen->cell, "spu_tex_3d");
1371 break;
1372 case TGSI_TEXTURE_CUBE:
1373 addr = lookup_function(gen->cell, "spu_tex_cube");
1374 break;
1375 default:
1376 ASSERT(0 && "unsupported texture target");
1377 return FALSE;
1378 }
1379
1380 assert(inst->FullSrcRegisters[1].SrcRegister.File == TGSI_FILE_SAMPLER);
1381
1382 spe_comment(gen->f, -4, "CALL tex:");
1383
1384 /* get src/dst reg info */
1385 for (ch = 0; ch < 4; ch++) {
1386 coord_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1387 d_regs[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1388 }
1389
1390 {
1391 ubyte usedRegs[SPE_NUM_REGS];
1392 uint i, numUsed;
1393
1394 numUsed = spe_get_registers_used(gen->f, usedRegs);
1395 assert(numUsed < gen->frame_size / 16 - 2);
1396
1397 /* save registers to stack */
1398 for (i = 0; i < numUsed; i++) {
1399 uint reg = usedRegs[i];
1400 int offset = 2 + i;
1401 spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset);
1402 }
1403
1404 /* setup function arguments (XXX depends on target) */
1405 for (i = 0; i < 4; i++) {
1406 spe_move(gen->f, 3 + i, coord_regs[i]);
1407 }
1408 spe_load_uint(gen->f, 7, unit); /* sampler unit */
1409
1410 /* branch to function, save return addr */
1411 spe_brasl(gen->f, SPE_REG_RA, addr);
1412
1413 /* save function's return values (four pixel's colors) */
1414 for (i = 0; i < 4; i++) {
1415 spe_move(gen->f, d_regs[i], 3 + i);
1416 }
1417
1418 /* restore registers from stack */
1419 for (i = 0; i < numUsed; i++) {
1420 uint reg = usedRegs[i];
1421 if (reg != d_regs[0] &&
1422 reg != d_regs[1] &&
1423 reg != d_regs[2] &&
1424 reg != d_regs[3]) {
1425 int offset = 2 + i;
1426 spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset);
1427 }
1428 }
1429 }
1430
1431 for (ch = 0; ch < 4; ch++) {
1432 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1433 store_dest_reg(gen, d_regs[ch], ch, &inst->FullDstRegisters[0]);
1434 free_itemps(gen);
1435 }
1436 }
1437
1438 return TRUE;
1439 }
1440
1441
1442 /**
1443 * KILL if any of src reg values are less than zero.
1444 */
1445 static boolean
1446 emit_KIL(struct codegen *gen, const struct tgsi_full_instruction *inst)
1447 {
1448 int ch;
1449 int s_regs[4], kil_reg = -1, cmp_reg, zero_reg;
1450
1451 spe_comment(gen->f, -4, "CALL kil:");
1452
1453 /* zero = {0,0,0,0} */
1454 zero_reg = get_itemp(gen);
1455 spe_load_uint(gen->f, zero_reg, 0);
1456
1457 cmp_reg = get_itemp(gen);
1458
1459 /* get src regs */
1460 for (ch = 0; ch < 4; ch++) {
1461 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1462 s_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1463 }
1464 }
1465
1466 /* test if any src regs are < 0 */
1467 for (ch = 0; ch < 4; ch++) {
1468 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1469 if (kil_reg >= 0) {
1470 /* cmp = 0 > src ? : ~0 : 0 */
1471 spe_fcgt(gen->f, cmp_reg, zero_reg, s_regs[ch]);
1472 /* kil = kil | cmp */
1473 spe_or(gen->f, kil_reg, kil_reg, cmp_reg);
1474 }
1475 else {
1476 kil_reg = get_itemp(gen);
1477 /* kil = 0 > src ? : ~0 : 0 */
1478 spe_fcgt(gen->f, kil_reg, zero_reg, s_regs[ch]);
1479 }
1480 }
1481 }
1482
1483 if (gen->if_nesting) {
1484 /* may have been a conditional kil */
1485 spe_and(gen->f, kil_reg, kil_reg, gen->exec_mask_reg);
1486 }
1487
1488 /* allocate the kill mask reg if needed */
1489 if (gen->kill_mask_reg <= 0) {
1490 gen->kill_mask_reg = spe_allocate_available_register(gen->f);
1491 spe_move(gen->f, gen->kill_mask_reg, kil_reg);
1492 }
1493 else {
1494 spe_or(gen->f, gen->kill_mask_reg, gen->kill_mask_reg, kil_reg);
1495 }
1496
1497 free_itemps(gen);
1498
1499 return TRUE;
1500 }
1501
1502
1503
1504 /**
1505 * Emit max. See emit_SGT for comments.
1506 */
1507 static boolean
1508 emit_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst)
1509 {
1510 int ch, s0_reg[4], s1_reg[4], d_reg[4], tmp_reg[4];
1511
1512 spe_comment(gen->f, -4, "MAX:");
1513
1514 for (ch = 0; ch < 4; ch++) {
1515 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1516 s0_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1517 s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
1518 d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1519 tmp_reg[ch] = get_itemp(gen);
1520 }
1521 }
1522
1523 /* d = (s0 > s1) ? s0 : s1 */
1524 for (ch = 0; ch < 4; ch++) {
1525 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1526 spe_fcgt(gen->f, tmp_reg[ch], s0_reg[ch], s1_reg[ch]);
1527 }
1528 }
1529 for (ch = 0; ch < 4; ch++) {
1530 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1531 spe_selb(gen->f, d_reg[ch], s1_reg[ch], s0_reg[ch], tmp_reg[ch]);
1532 }
1533 }
1534
1535 for (ch = 0; ch < 4; ch++) {
1536 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1537 store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
1538 }
1539 }
1540
1541 free_itemps(gen);
1542 return true;
1543 }
1544
1545 /**
1546 * Emit max. See emit_SGT for comments.
1547 */
1548 static boolean
1549 emit_MIN(struct codegen *gen, const struct tgsi_full_instruction *inst)
1550 {
1551 int ch, s0_reg[4], s1_reg[4], d_reg[4], tmp_reg[4];
1552
1553 spe_comment(gen->f, -4, "MIN:");
1554
1555 for (ch = 0; ch < 4; ch++) {
1556 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1557 s0_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1558 s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
1559 d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1560 tmp_reg[ch] = get_itemp(gen);
1561 }
1562 }
1563
1564 /* d = (s1 > s0) ? s0 : s1 */
1565 for (ch = 0; ch < 4; ch++) {
1566 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1567 spe_fcgt(gen->f, tmp_reg[ch], s1_reg[ch], s0_reg[ch]);
1568 }
1569 }
1570 for (ch = 0; ch < 4; ch++) {
1571 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1572 spe_selb(gen->f, d_reg[ch], s1_reg[ch], s0_reg[ch], tmp_reg[ch]);
1573 }
1574 }
1575
1576 for (ch = 0; ch < 4; ch++) {
1577 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1578 store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]);
1579 }
1580 }
1581
1582 free_itemps(gen);
1583 return true;
1584 }
1585
1586 static boolean
1587 emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst)
1588 {
1589 const int channel = 0;
1590 const int exec_reg = get_exec_mask_reg(gen);
1591
1592 spe_comment(gen->f, -4, "IF:");
1593
1594 /* update execution mask with the predicate register */
1595 int tmp_reg = get_itemp(gen);
1596 int s1_reg = get_src_reg(gen, channel, &inst->FullSrcRegisters[0]);
1597
1598 /* tmp = (s1_reg == 0) */
1599 spe_ceqi(gen->f, tmp_reg, s1_reg, 0);
1600 /* tmp = !tmp */
1601 spe_complement(gen->f, tmp_reg, tmp_reg);
1602 /* exec_mask = exec_mask & tmp */
1603 spe_and(gen->f, exec_reg, exec_reg, tmp_reg);
1604
1605 gen->if_nesting++;
1606
1607 free_itemps(gen);
1608
1609 return true;
1610 }
1611
1612
1613 static boolean
1614 emit_ELSE(struct codegen *gen, const struct tgsi_full_instruction *inst)
1615 {
1616 const int exec_reg = get_exec_mask_reg(gen);
1617
1618 spe_comment(gen->f, -4, "ELSE:");
1619
1620 /* exec_mask = !exec_mask */
1621 spe_complement(gen->f, exec_reg, exec_reg);
1622
1623 return true;
1624 }
1625
1626
1627 static boolean
1628 emit_ENDIF(struct codegen *gen, const struct tgsi_full_instruction *inst)
1629 {
1630 const int exec_reg = get_exec_mask_reg(gen);
1631
1632 spe_comment(gen->f, -4, "ENDIF:");
1633
1634 /* XXX todo: pop execution mask */
1635
1636 spe_load_int(gen->f, exec_reg, ~0x0);
1637
1638 gen->if_nesting--;
1639 return true;
1640 }
1641
1642
1643 static boolean
1644 emit_DDX_DDY(struct codegen *gen, const struct tgsi_full_instruction *inst,
1645 boolean ddx)
1646 {
1647 int ch;
1648
1649 spe_comment(gen->f, -4, ddx ? "DDX:" : "DDY:");
1650
1651 for (ch = 0; ch < 4; ch++) {
1652 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1653 int s_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1654 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1655
1656 int t1_reg = get_itemp(gen);
1657 int t2_reg = get_itemp(gen);
1658
1659 spe_splat_word(gen->f, t1_reg, s_reg, 0); /* upper-left pixel */
1660 if (ddx) {
1661 spe_splat_word(gen->f, t2_reg, s_reg, 1); /* upper-right pixel */
1662 }
1663 else {
1664 spe_splat_word(gen->f, t2_reg, s_reg, 2); /* lower-left pixel */
1665 }
1666 spe_fs(gen->f, d_reg, t2_reg, t1_reg);
1667
1668 free_itemps(gen);
1669 }
1670 }
1671
1672 return true;
1673 }
1674
1675
1676
1677
1678 /**
1679 * Emit END instruction.
1680 * We just return from the shader function at this point.
1681 *
1682 * Note that there may be more code after this that would be
1683 * called by TGSI_OPCODE_CALL.
1684 */
1685 static boolean
1686 emit_END(struct codegen *gen)
1687 {
1688 spe_comment(gen->f, -4, "END:");
1689 emit_epilogue(gen);
1690 return true;
1691 }
1692
1693
1694 /**
1695 * Emit code for the given instruction. Just a big switch stmt.
1696 */
1697 static boolean
1698 emit_instruction(struct codegen *gen,
1699 const struct tgsi_full_instruction *inst)
1700 {
1701 switch (inst->Instruction.Opcode) {
1702 case TGSI_OPCODE_MOV:
1703 case TGSI_OPCODE_SWZ:
1704 return emit_MOV(gen, inst);
1705 case TGSI_OPCODE_MUL:
1706 return emit_MUL(gen, inst);
1707 case TGSI_OPCODE_ADD:
1708 return emit_ADD(gen, inst);
1709 case TGSI_OPCODE_SUB:
1710 return emit_SUB(gen, inst);
1711 case TGSI_OPCODE_MAD:
1712 return emit_MAD(gen, inst);
1713 case TGSI_OPCODE_LERP:
1714 return emit_LERP(gen, inst);
1715 case TGSI_OPCODE_DP3:
1716 return emit_DP3(gen, inst);
1717 case TGSI_OPCODE_DP4:
1718 return emit_DP4(gen, inst);
1719 case TGSI_OPCODE_DPH:
1720 return emit_DPH(gen, inst);
1721 case TGSI_OPCODE_XPD:
1722 return emit_XPD(gen, inst);
1723 case TGSI_OPCODE_RCP:
1724 return emit_RCP(gen, inst);
1725 case TGSI_OPCODE_RSQ:
1726 return emit_RSQ(gen, inst);
1727 case TGSI_OPCODE_ABS:
1728 return emit_ABS(gen, inst);
1729 case TGSI_OPCODE_SGT:
1730 return emit_SGT(gen, inst);
1731 case TGSI_OPCODE_SLT:
1732 return emit_SLT(gen, inst);
1733 case TGSI_OPCODE_SGE:
1734 return emit_SGE(gen, inst);
1735 case TGSI_OPCODE_SLE:
1736 return emit_SLE(gen, inst);
1737 case TGSI_OPCODE_SEQ:
1738 return emit_SEQ(gen, inst);
1739 case TGSI_OPCODE_SNE:
1740 return emit_SNE(gen, inst);
1741 case TGSI_OPCODE_CMP:
1742 return emit_CMP(gen, inst);
1743 case TGSI_OPCODE_MAX:
1744 return emit_MAX(gen, inst);
1745 case TGSI_OPCODE_MIN:
1746 return emit_MIN(gen, inst);
1747 case TGSI_OPCODE_TRUNC:
1748 return emit_TRUNC(gen, inst);
1749 case TGSI_OPCODE_FLR:
1750 return emit_FLR(gen, inst);
1751 case TGSI_OPCODE_FRC:
1752 return emit_FRC(gen, inst);
1753 case TGSI_OPCODE_END:
1754 return emit_END(gen);
1755
1756 case TGSI_OPCODE_COS:
1757 return emit_function_call(gen, inst, "spu_cos", 1);
1758 case TGSI_OPCODE_SIN:
1759 return emit_function_call(gen, inst, "spu_sin", 1);
1760 case TGSI_OPCODE_POW:
1761 return emit_function_call(gen, inst, "spu_pow", 2);
1762 case TGSI_OPCODE_EXPBASE2:
1763 return emit_function_call(gen, inst, "spu_exp2", 1);
1764 case TGSI_OPCODE_LOGBASE2:
1765 return emit_function_call(gen, inst, "spu_log2", 1);
1766 case TGSI_OPCODE_TEX:
1767 /* fall-through for now */
1768 case TGSI_OPCODE_TXD:
1769 /* fall-through for now */
1770 case TGSI_OPCODE_TXB:
1771 /* fall-through for now */
1772 case TGSI_OPCODE_TXL:
1773 /* fall-through for now */
1774 case TGSI_OPCODE_TXP:
1775 return emit_TEX(gen, inst);
1776 case TGSI_OPCODE_KIL:
1777 return emit_KIL(gen, inst);
1778
1779 case TGSI_OPCODE_IF:
1780 return emit_IF(gen, inst);
1781 case TGSI_OPCODE_ELSE:
1782 return emit_ELSE(gen, inst);
1783 case TGSI_OPCODE_ENDIF:
1784 return emit_ENDIF(gen, inst);
1785
1786 case TGSI_OPCODE_DDX:
1787 return emit_DDX_DDY(gen, inst, true);
1788 case TGSI_OPCODE_DDY:
1789 return emit_DDX_DDY(gen, inst, false);
1790
1791 /* XXX lots more cases to do... */
1792
1793 default:
1794 fprintf(stderr, "Cell: unimplemented TGSI instruction %d!\n",
1795 inst->Instruction.Opcode);
1796 return false;
1797 }
1798
1799 return true;
1800 }
1801
1802
1803
1804 /**
1805 * Emit code for a TGSI immediate value (vector of four floats).
1806 * This involves register allocation and initialization.
1807 * XXX the initialization should be done by a "prepare" stage, not
1808 * per quad execution!
1809 */
1810 static boolean
1811 emit_immediate(struct codegen *gen, const struct tgsi_full_immediate *immed)
1812 {
1813 int ch;
1814
1815 assert(gen->num_imm < MAX_TEMPS);
1816
1817 spe_comment(gen->f, -4, "IMMEDIATE:");
1818
1819 for (ch = 0; ch < 4; ch++) {
1820 float val = immed->u.ImmediateFloat32[ch].Float;
1821
1822 if (ch > 0 && val == immed->u.ImmediateFloat32[ch - 1].Float) {
1823 /* re-use previous register */
1824 gen->imm_regs[gen->num_imm][ch] = gen->imm_regs[gen->num_imm][ch - 1];
1825 }
1826 else {
1827 int reg = spe_allocate_available_register(gen->f);
1828
1829 if (reg < 0)
1830 return false;
1831
1832 /* update immediate map */
1833 gen->imm_regs[gen->num_imm][ch] = reg;
1834
1835 /* emit initializer instruction */
1836 spe_load_float(gen->f, reg, val);
1837 }
1838 }
1839
1840 gen->num_imm++;
1841
1842 return true;
1843 }
1844
1845
1846
1847 /**
1848 * Emit "code" for a TGSI declaration.
1849 * We only care about TGSI TEMPORARY register declarations at this time.
1850 * For each TGSI TEMPORARY we allocate four SPE registers.
1851 */
1852 static boolean
1853 emit_declaration(struct cell_context *cell,
1854 struct codegen *gen, const struct tgsi_full_declaration *decl)
1855 {
1856 int i, ch;
1857
1858 switch (decl->Declaration.File) {
1859 case TGSI_FILE_TEMPORARY:
1860 for (i = decl->DeclarationRange.First;
1861 i <= decl->DeclarationRange.Last;
1862 i++) {
1863 assert(i < MAX_TEMPS);
1864 for (ch = 0; ch < 4; ch++) {
1865 gen->temp_regs[i][ch] = spe_allocate_available_register(gen->f);
1866 if (gen->temp_regs[i][ch] < 0)
1867 return false; /* out of regs */
1868 }
1869
1870 /* XXX if we run out of SPE registers, we need to spill
1871 * to SPU memory. someday...
1872 */
1873
1874 {
1875 char buf[100];
1876 sprintf(buf, "TGSI temp[%d] maps to SPU regs [$%d $%d $%d $%d]", i,
1877 gen->temp_regs[i][0], gen->temp_regs[i][1],
1878 gen->temp_regs[i][2], gen->temp_regs[i][3]);
1879 spe_comment(gen->f, -4, buf);
1880 }
1881 }
1882 break;
1883 default:
1884 ; /* ignore */
1885 }
1886
1887 return true;
1888 }
1889
1890
1891
1892 /**
1893 * Translate TGSI shader code to SPE instructions. This is done when
1894 * the state tracker gives us a new shader (via pipe->create_fs_state()).
1895 *
1896 * \param cell the rendering context (in)
1897 * \param tokens the TGSI shader (in)
1898 * \param f the generated function (out)
1899 */
1900 boolean
1901 cell_gen_fragment_program(struct cell_context *cell,
1902 const struct tgsi_token *tokens,
1903 struct spe_function *f)
1904 {
1905 struct tgsi_parse_context parse;
1906 struct codegen gen;
1907
1908 memset(&gen, 0, sizeof(gen));
1909 gen.cell = cell;
1910 gen.f = f;
1911
1912 /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
1913 gen.inputs_reg = 3; /* pointer to inputs array */
1914 gen.outputs_reg = 4; /* pointer to outputs array */
1915 gen.constants_reg = 5; /* pointer to constants array */
1916
1917 spe_init_func(f, SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE);
1918 spe_allocate_register(f, gen.inputs_reg);
1919 spe_allocate_register(f, gen.outputs_reg);
1920 spe_allocate_register(f, gen.constants_reg);
1921
1922 if (cell->debug_flags & CELL_DEBUG_ASM) {
1923 spe_print_code(f, true);
1924 spe_indent(f, 8);
1925 printf("Begin %s\n", __FUNCTION__);
1926 tgsi_dump(tokens, 0);
1927 }
1928
1929 tgsi_parse_init(&parse, tokens);
1930
1931 emit_prologue(&gen);
1932
1933 while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) {
1934 tgsi_parse_token(&parse);
1935
1936 switch (parse.FullToken.Token.Type) {
1937 case TGSI_TOKEN_TYPE_IMMEDIATE:
1938 if (!emit_immediate(&gen, &parse.FullToken.FullImmediate))
1939 gen.error = true;
1940 break;
1941
1942 case TGSI_TOKEN_TYPE_DECLARATION:
1943 if (!emit_declaration(cell, &gen, &parse.FullToken.FullDeclaration))
1944 gen.error = true;
1945 break;
1946
1947 case TGSI_TOKEN_TYPE_INSTRUCTION:
1948 if (!emit_instruction(&gen, &parse.FullToken.FullInstruction))
1949 gen.error = true;
1950 break;
1951
1952 default:
1953 assert(0);
1954 }
1955 }
1956
1957 if (gen.error) {
1958 /* terminate the SPE code */
1959 return emit_END(&gen);
1960 }
1961
1962 if (cell->debug_flags & CELL_DEBUG_ASM) {
1963 printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst);
1964 printf("End %s\n", __FUNCTION__);
1965 }
1966
1967 tgsi_parse_free( &parse );
1968
1969 return !gen.error;
1970 }