cell: Fixed bug with absolute, negate, set-negative logic in source fetch for TGSI...
[mesa.git] / src / gallium / drivers / cell / ppu / cell_gen_fp.c
1 /**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29
30 /**
31 * Generate SPU fragment program/shader code.
32 *
33 * Note that we generate SOA-style code here. So each TGSI instruction
34 * operates on four pixels (and is translated into four SPU instructions,
35 * generally speaking).
36 *
37 * \author Brian Paul
38 */
39
40
41 #include "pipe/p_defines.h"
42 #include "pipe/p_state.h"
43 #include "pipe/p_shader_tokens.h"
44 #include "tgsi/tgsi_parse.h"
45 #include "tgsi/tgsi_util.h"
46 #include "tgsi/tgsi_exec.h"
47 #include "tgsi/tgsi_dump.h"
48 #include "rtasm/rtasm_ppc_spe.h"
49 #include "util/u_memory.h"
50 #include "cell_context.h"
51 #include "cell_gen_fp.h"
52
53
54 #define MAX_TEMPS 16
55 #define MAX_IMMED 8
56
57 #define CHAN_X 0
58 #define CHAN_Y 1
59 #define CHAN_Z 2
60 #define CHAN_W 3
61
62 /**
63 * Context needed during code generation.
64 */
65 struct codegen
66 {
67 int inputs_reg; /**< 1st function parameter */
68 int outputs_reg; /**< 2nd function parameter */
69 int constants_reg; /**< 3rd function parameter */
70 int temp_regs[MAX_TEMPS][4]; /**< maps TGSI temps to SPE registers */
71 int imm_regs[MAX_IMMED][4]; /**< maps TGSI immediates to SPE registers */
72
73 int num_imm; /**< number of immediates */
74
75 int one_reg; /**< register containing {1.0, 1.0, 1.0, 1.0} */
76
77 /** Per-instruction temps / intermediate temps */
78 int num_itemps;
79 int itemps[10];
80
81 /** Current IF/ELSE/ENDIF nesting level */
82 int if_nesting;
83 /** Index of execution mask register */
84 int exec_mask_reg;
85
86 struct spe_function *f;
87 boolean error;
88 };
89
90
91 /**
92 * Allocate an intermediate temporary register.
93 */
94 static int
95 get_itemp(struct codegen *gen)
96 {
97 int t = spe_allocate_available_register(gen->f);
98 assert(gen->num_itemps < Elements(gen->itemps));
99 gen->itemps[gen->num_itemps++] = t;
100 return t;
101 }
102
103 /**
104 * Free all intermediate temporary registers. To be called after each
105 * instruction has been emitted.
106 */
107 static void
108 free_itemps(struct codegen *gen)
109 {
110 int i;
111 for (i = 0; i < gen->num_itemps; i++) {
112 spe_release_register(gen->f, gen->itemps[i]);
113 }
114 gen->num_itemps = 0;
115 }
116
117
118 /**
119 * Return index of an SPE register containing {1.0, 1.0, 1.0, 1.0}.
120 * The register is allocated and initialized upon the first call.
121 */
122 static int
123 get_const_one_reg(struct codegen *gen)
124 {
125 if (gen->one_reg <= 0) {
126 gen->one_reg = spe_allocate_available_register(gen->f);
127
128 spe_indent(gen->f, 4);
129 spe_comment(gen->f, -4, "INIT CONSTANT 1.0:");
130
131 /* one = {1.0, 1.0, 1.0, 1.0} */
132 spe_load_float(gen->f, gen->one_reg, 1.0f);
133
134 spe_indent(gen->f, -4);
135 }
136
137 return gen->one_reg;
138 }
139
140
141 /**
142 * Return index of the pixel execution mask.
143 * The register is allocated an initialized upon the first call.
144 *
145 * The pixel execution mask controls which pixels in a quad are
146 * modified, according to surrounding conditionals, loops, etc.
147 */
148 static int
149 get_exec_mask_reg(struct codegen *gen)
150 {
151 if (gen->exec_mask_reg <= 0) {
152 gen->exec_mask_reg = spe_allocate_available_register(gen->f);
153
154 spe_indent(gen->f, 4);
155 spe_comment(gen->f, -4, "INIT EXEC MASK = ~0:");
156
157 /* exec_mask = {~0, ~0, ~0, ~0} */
158 spe_load_int(gen->f, gen->exec_mask_reg, ~0);
159
160 spe_indent(gen->f, -4);
161 }
162
163 return gen->exec_mask_reg;
164 }
165
166
167 /**
168 * Return the index of the SPU temporary containing the named TGSI
169 * source register. If the TGSI register is a TGSI_FILE_TEMPORARY we
170 * just return the corresponding SPE register. If the TGIS register
171 * is TGSI_FILE_INPUT/CONSTANT/IMMEDIATE we allocate a new SPE register
172 * and emit an SPE load instruction.
173 */
174 static int
175 get_src_reg(struct codegen *gen,
176 int channel,
177 const struct tgsi_full_src_register *src)
178 {
179 int reg = -1;
180 int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel);
181 boolean reg_is_itemp = FALSE;
182 uint sign_op;
183
184 assert(swizzle >= TGSI_SWIZZLE_X);
185 assert(swizzle <= TGSI_EXTSWIZZLE_ONE);
186
187 switch (src->SrcRegister.File) {
188 case TGSI_FILE_TEMPORARY:
189 reg = gen->temp_regs[src->SrcRegister.Index][swizzle];
190 break;
191 case TGSI_FILE_INPUT:
192 {
193 if(swizzle == TGSI_EXTSWIZZLE_ONE)
194 {
195 /* Load const one float and early out */
196 reg = get_const_one_reg(gen);
197 }
198 else if(swizzle == TGSI_EXTSWIZZLE_ZERO)
199 {
200 /* Load const zero float and early out */
201 reg = get_itemp(gen);
202 spe_xor(gen->f, reg, reg, reg);
203 }
204 else
205 {
206 /* offset is measured in quadwords, not bytes */
207 int offset = src->SrcRegister.Index * 4 + swizzle;
208 reg = get_itemp(gen);
209 reg_is_itemp = TRUE;
210 /* Load: reg = memory[(machine_reg) + offset] */
211 spe_lqd(gen->f, reg, gen->inputs_reg, offset);
212 }
213 }
214 break;
215 case TGSI_FILE_IMMEDIATE:
216 reg = gen->imm_regs[src->SrcRegister.Index][swizzle];
217 break;
218 case TGSI_FILE_CONSTANT:
219 /* xxx fall-through for now / fix */
220 default:
221 assert(0);
222 }
223
224 /*
225 * Handle absolute value, negate or set-negative of src register.
226 */
227 sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel);
228 if (sign_op != TGSI_UTIL_SIGN_KEEP) {
229 /*
230 * All sign ops are done by manipulating bit 31, the IEEE float sign bit.
231 */
232 const int bit31mask_reg = get_itemp(gen);
233 int result_reg;
234
235 if (reg_is_itemp) {
236 /* re-use 'reg' for the result */
237 result_reg = reg;
238 }
239 else {
240 /* alloc a new reg for the result */
241 result_reg = get_itemp(gen);
242 }
243
244 /* mask with bit 31 set, the rest cleared */
245 spe_load_int(gen->f, bit31mask_reg, (1 << 31));
246
247 if (sign_op == TGSI_UTIL_SIGN_CLEAR) {
248 spe_andc(gen->f, result_reg, reg, bit31mask_reg);
249 }
250 else if (sign_op == TGSI_UTIL_SIGN_SET) {
251 spe_and(gen->f, result_reg, reg, bit31mask_reg);
252 }
253 else {
254 assert(sign_op == TGSI_UTIL_SIGN_TOGGLE);
255 spe_xor(gen->f, result_reg, reg, bit31mask_reg);
256 }
257
258 reg = result_reg;
259 }
260
261 return reg;
262 }
263
264
265 /**
266 * Return the index of an SPE register to use for the given TGSI register.
267 * If the TGSI register is TGSI_FILE_TEMPORARAY, the index of the
268 * corresponding SPE register is returned. If the TGSI register is
269 * TGSI_FILE_OUTPUT we allocate an intermediate temporary register.
270 * See store_dest_reg() below...
271 */
272 static int
273 get_dst_reg(struct codegen *gen,
274 int channel,
275 const struct tgsi_full_dst_register *dest)
276 {
277 int reg = -1;
278
279 switch (dest->DstRegister.File) {
280 case TGSI_FILE_TEMPORARY:
281 if (gen->if_nesting > 0)
282 reg = get_itemp(gen);
283 else
284 reg = gen->temp_regs[dest->DstRegister.Index][channel];
285 break;
286 case TGSI_FILE_OUTPUT:
287 reg = get_itemp(gen);
288 break;
289 default:
290 assert(0);
291 }
292
293 return reg;
294 }
295
296
297 /**
298 * When a TGSI instruction is writing to an output register, this
299 * function emits the SPE store instruction to store the value_reg.
300 * \param value_reg the SPE register containing the value to store.
301 * This would have been returned by get_dst_reg().
302 */
303 static void
304 store_dest_reg(struct codegen *gen,
305 int value_reg, int channel,
306 const struct tgsi_full_dst_register *dest)
307 {
308 switch (dest->DstRegister.File) {
309 case TGSI_FILE_TEMPORARY:
310 if (gen->if_nesting > 0) {
311 int d_reg = gen->temp_regs[dest->DstRegister.Index][channel];
312 int exec_reg = get_exec_mask_reg(gen);
313 /* Mix d with new value according to exec mask:
314 * d[i] = mask_reg[i] ? value_reg : d_reg
315 */
316 spe_selb(gen->f, d_reg, d_reg, value_reg, exec_reg);
317 }
318 else {
319 /* we're not inside a condition or loop: do nothing special */
320 }
321 break;
322 case TGSI_FILE_OUTPUT:
323 {
324 /* offset is measured in quadwords, not bytes */
325 int offset = dest->DstRegister.Index * 4 + channel;
326 if (gen->if_nesting > 0) {
327 int exec_reg = get_exec_mask_reg(gen);
328 int curval_reg = get_itemp(gen);
329 /* First read the current value from memory:
330 * Load: curval = memory[(machine_reg) + offset]
331 */
332 spe_lqd(gen->f, curval_reg, gen->outputs_reg, offset);
333 /* Mix curval with newvalue according to exec mask:
334 * d[i] = mask_reg[i] ? value_reg : d_reg
335 */
336 spe_selb(gen->f, curval_reg, curval_reg, value_reg, exec_reg);
337 /* Store: memory[(machine_reg) + offset] = curval */
338 spe_stqd(gen->f, curval_reg, gen->outputs_reg, offset);
339 }
340 else {
341 /* Store: memory[(machine_reg) + offset] = reg */
342 spe_stqd(gen->f, value_reg, gen->outputs_reg, offset);
343 }
344 }
345 break;
346 default:
347 assert(0);
348 }
349 }
350
351
352 static boolean
353 emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst)
354 {
355 int ch;
356 spe_comment(gen->f, -4, "MOV:");
357 for (ch = 0; ch < 4; ch++) {
358 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
359 int src_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
360 int dst_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
361 /* XXX we don't always need to actually emit a mov instruction here */
362 spe_move(gen->f, dst_reg, src_reg);
363 store_dest_reg(gen, dst_reg, ch, &inst->FullDstRegisters[0]);
364 free_itemps(gen);
365 }
366 }
367 return true;
368 }
369
370 /**
371 * Emit addition instructions. Recall that a single TGSI_OPCODE_ADD
372 * becomes (up to) four SPU "fa" instructions because we're doing SOA
373 * processing.
374 */
375 static boolean
376 emit_ADD(struct codegen *gen, const struct tgsi_full_instruction *inst)
377 {
378 int ch;
379 spe_comment(gen->f, -4, "ADD:");
380 /* Loop over Red/Green/Blue/Alpha channels */
381 for (ch = 0; ch < 4; ch++) {
382 /* If the dest R, G, B or A writemask is enabled... */
383 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
384 /* get indexes of the two src, one dest SPE registers */
385 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
386 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
387 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
388
389 /* Emit actual SPE instruction: d = s1 + s2 */
390 spe_fa(gen->f, d_reg, s1_reg, s2_reg);
391
392 /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
393 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
394 /* Free any intermediate temps we allocated */
395 free_itemps(gen);
396 }
397 }
398 return true;
399 }
400
401 /**
402 * Emit subtract. See emit_ADD for comments.
403 */
404 static boolean
405 emit_SUB(struct codegen *gen, const struct tgsi_full_instruction *inst)
406 {
407 int ch;
408 spe_comment(gen->f, -4, "SUB:");
409 /* Loop over Red/Green/Blue/Alpha channels */
410 for (ch = 0; ch < 4; ch++) {
411 /* If the dest R, G, B or A writemask is enabled... */
412 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
413 /* get indexes of the two src, one dest SPE registers */
414 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
415 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
416 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
417
418 /* Emit actual SPE instruction: d = s1 - s2 */
419 spe_fs(gen->f, d_reg, s1_reg, s2_reg);
420
421 /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
422 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
423 /* Free any intermediate temps we allocated */
424 free_itemps(gen);
425 }
426 }
427 return true;
428 }
429
430 /**
431 * Emit multiply add. See emit_ADD for comments.
432 */
433 static boolean
434 emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst)
435 {
436 int ch;
437 spe_comment(gen->f, -4, "MAD:");
438 for (ch = 0; ch < 4; ch++) {
439 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
440 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
441 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
442 int s3_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
443 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
444 /* d = s1 * s2 + s3 */
445 spe_fma(gen->f, d_reg, s1_reg, s2_reg, s3_reg);
446 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
447 free_itemps(gen);
448 }
449 }
450 return true;
451 }
452
453
454 /**
455 * Emit linear interpolate. See emit_ADD for comments.
456 */
457 static boolean
458 emit_LERP(struct codegen *gen, const struct tgsi_full_instruction *inst)
459 {
460 int ch;
461 spe_comment(gen->f, -4, "LERP:");
462 for (ch = 0; ch < 4; ch++) {
463 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
464 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
465 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
466 int s3_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
467 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
468 /* d = s3 + s1(s2 - s3) */
469 spe_fs(gen->f, d_reg, s2_reg, s3_reg);
470 spe_fma(gen->f, d_reg, d_reg, s1_reg, s3_reg);
471 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
472 free_itemps(gen);
473 }
474 }
475 return true;
476 }
477
478 /**
479 * Emit multiply. See emit_ADD for comments.
480 */
481 static boolean
482 emit_MUL(struct codegen *gen, const struct tgsi_full_instruction *inst)
483 {
484 int ch;
485 spe_comment(gen->f, -4, "MUL:");
486 for (ch = 0; ch < 4; ch++) {
487 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
488 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
489 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
490 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
491 /* d = s1 * s2 */
492 spe_fm(gen->f, d_reg, s1_reg, s2_reg);
493 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
494 free_itemps(gen);
495 }
496 }
497 return true;
498 }
499
500 /**
501 * Emit reciprocal. See emit_ADD for comments.
502 */
503 static boolean
504 emit_RCP(struct codegen *gen, const struct tgsi_full_instruction *inst)
505 {
506 int ch;
507 spe_comment(gen->f, -4, "RCP:");
508 for (ch = 0; ch < 4; ch++) {
509 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
510 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
511 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
512 /* d = 1/s1 */
513 spe_frest(gen->f, d_reg, s1_reg);
514 spe_fi(gen->f, d_reg, s1_reg, d_reg);
515 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
516 free_itemps(gen);
517 }
518 }
519 return true;
520 }
521
522 /**
523 * Emit reciprocal sqrt. See emit_ADD for comments.
524 */
525 static boolean
526 emit_RSQ(struct codegen *gen, const struct tgsi_full_instruction *inst)
527 {
528 int ch;
529 spe_comment(gen->f, -4, "RSQ:");
530 for (ch = 0; ch < 4; ch++) {
531 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
532 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
533 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
534 /* d = 1/s1 */
535 spe_frsqest(gen->f, d_reg, s1_reg);
536 spe_fi(gen->f, d_reg, s1_reg, d_reg);
537 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
538 free_itemps(gen);
539 }
540 }
541 return true;
542 }
543
544 /**
545 * Emit absolute value. See emit_ADD for comments.
546 */
547 static boolean
548 emit_ABS(struct codegen *gen, const struct tgsi_full_instruction *inst)
549 {
550 int ch;
551 spe_comment(gen->f, -4, "ABS:");
552 for (ch = 0; ch < 4; ch++) {
553 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
554 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
555 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
556 const int bit31mask_reg = get_itemp(gen);
557
558 /* mask with bit 31 set, the rest cleared */
559 spe_load_int(gen->f, bit31mask_reg, (1 << 31));
560
561 /* d = sign bit cleared in s1 */
562 spe_andc(gen->f, d_reg, s1_reg, bit31mask_reg);
563
564 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
565 free_itemps(gen);
566 }
567 }
568 return true;
569 }
570
571 /**
572 * Emit 3 component dot product. See emit_ADD for comments.
573 */
574 static boolean
575 emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst)
576 {
577 int ch;
578 spe_comment(gen->f, -4, "DP3:");
579
580 int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
581 int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
582 int tmp_reg = get_itemp(gen);
583 /* t = x0 * x1 */
584 spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
585
586 s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
587 s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
588 /* t = y0 * y1 + t */
589 spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
590
591 s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
592 s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
593 /* t = z0 * z1 + t */
594 spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
595
596 for (ch = 0; ch < 4; ch++) {
597 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
598 store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]);
599 }
600 }
601
602 free_itemps(gen);
603 return true;
604 }
605
606 /**
607 * Emit 4 component dot product. See emit_ADD for comments.
608 */
609 static boolean
610 emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst)
611 {
612 int ch;
613 spe_comment(gen->f, -4, "DP4:");
614
615 int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
616 int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
617 int tmp_reg = get_itemp(gen);
618 /* t = x0 * x1 */
619 spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
620
621 s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
622 s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
623 /* t = y0 * y1 + t */
624 spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
625
626 s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
627 s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
628 /* t = z0 * z1 + t */
629 spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
630
631 s1_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[0]);
632 s2_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]);
633 /* t = w0 * w1 + t */
634 spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
635
636 for (ch = 0; ch < 4; ch++) {
637 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
638 store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]);
639 }
640 }
641
642 free_itemps(gen);
643 return true;
644 }
645
646 /**
647 * Emit homogeneous dot product. See emit_ADD for comments.
648 */
649 static boolean
650 emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst)
651 {
652 int ch;
653 spe_comment(gen->f, -4, "DPH:");
654
655 int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
656 int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
657 int tmp_reg = get_itemp(gen);
658
659 /* t = x0 * x1 */
660 spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
661
662 s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
663 s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
664 /* t = y0 * y1 + t */
665 spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
666
667 s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
668 s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
669 /* t = z0 * z1 + t */
670 spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
671
672 s2_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]);
673 /* t = w1 + t */
674 spe_fa(gen->f, tmp_reg, s2_reg, tmp_reg);
675
676 for (ch = 0; ch < 4; ch++) {
677 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
678 store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]);
679 }
680 }
681
682 free_itemps(gen);
683 return true;
684 }
685
686 /**
687 * Emit cross product. See emit_ADD for comments.
688 */
689 static boolean
690 emit_XPD(struct codegen *gen, const struct tgsi_full_instruction *inst)
691 {
692 spe_comment(gen->f, -4, "XPD:");
693
694 int s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
695 int s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
696 int tmp_reg = get_itemp(gen);
697
698 /* t = z0 * y1 */
699 spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
700
701 s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
702 s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
703 /* t = y0 * z1 - t */
704 spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
705
706 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_X)) {
707 store_dest_reg(gen, tmp_reg, CHAN_X, &inst->FullDstRegisters[0]);
708 }
709
710 s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
711 s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
712 /* t = x0 * z1 */
713 spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
714
715 s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
716 s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
717 /* t = z0 * x1 - t */
718 spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
719
720 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Y)) {
721 store_dest_reg(gen, tmp_reg, CHAN_Y, &inst->FullDstRegisters[0]);
722 }
723
724 s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
725 s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
726 /* t = y0 * x1 */
727 spe_fm(gen->f, tmp_reg, s1_reg, s2_reg);
728
729 s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
730 s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
731 /* t = x0 * y1 - t */
732 spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg);
733
734 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Z)) {
735 store_dest_reg(gen, tmp_reg, CHAN_Z, &inst->FullDstRegisters[0]);
736 }
737
738 free_itemps(gen);
739 return true;
740 }
741
742 /**
743 * Emit set-if-greater-than.
744 * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as
745 * the result but OpenGL/TGSI needs 0.0 and 1.0 results.
746 * We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND.
747 */
748 static boolean
749 emit_SGT(struct codegen *gen, const struct tgsi_full_instruction *inst)
750 {
751 int ch;
752
753 spe_comment(gen->f, -4, "SGT:");
754
755 for (ch = 0; ch < 4; ch++) {
756 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
757 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
758 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
759 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
760
761 /* d = (s1 > s2) */
762 spe_fcgt(gen->f, d_reg, s1_reg, s2_reg);
763
764 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
765 /* d = d & one_reg */
766 spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
767
768 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
769 free_itemps(gen);
770 }
771 }
772
773 return true;
774 }
775
776 /**
777 * Emit set-if_less-then. See emit_SGT for comments.
778 */
779 static boolean
780 emit_SLT(struct codegen *gen, const struct tgsi_full_instruction *inst)
781 {
782 int ch;
783
784 spe_comment(gen->f, -4, "SLT:");
785
786 for (ch = 0; ch < 4; ch++) {
787 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
788 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
789 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
790 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
791
792 /* d = (s1 < s2) */
793 spe_fcgt(gen->f, d_reg, s2_reg, s1_reg);
794
795 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
796 /* d = d & one_reg */
797 spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
798
799 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
800 free_itemps(gen);
801 }
802 }
803
804 return true;
805 }
806
807 /**
808 * Emit set-if_greater-then-or-equal. See emit_SGT for comments.
809 */
810 static boolean
811 emit_SGE(struct codegen *gen, const struct tgsi_full_instruction *inst)
812 {
813 int ch;
814
815 spe_comment(gen->f, -4, "SGE:");
816
817 for (ch = 0; ch < 4; ch++) {
818 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
819 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
820 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
821 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
822
823 /* d = (s1 >= s2) */
824 spe_fcgt(gen->f, d_reg, s2_reg, s1_reg);
825
826 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
827 /* d = ~d & one_reg */
828 spe_andc(gen->f, d_reg, get_const_one_reg(gen), d_reg);
829
830 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
831 free_itemps(gen);
832 }
833 }
834
835 return true;
836 }
837
838 /**
839 * Emit set-if_less-then-or-equal. See emit_SGT for comments.
840 */
841 static boolean
842 emit_SLE(struct codegen *gen, const struct tgsi_full_instruction *inst)
843 {
844 int ch;
845
846 spe_comment(gen->f, -4, "SLE:");
847
848 for (ch = 0; ch < 4; ch++) {
849 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
850 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
851 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
852 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
853
854 /* d = (s1 <= s2) */
855 spe_fcgt(gen->f, d_reg, s1_reg, s2_reg);
856
857 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
858 /* d = ~d & one_reg */
859 spe_andc(gen->f, d_reg, get_const_one_reg(gen), d_reg);
860
861 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
862 free_itemps(gen);
863 }
864 }
865
866 return true;
867 }
868
869 /**
870 * Emit set-if_equal. See emit_SGT for comments.
871 */
872 static boolean
873 emit_SEQ(struct codegen *gen, const struct tgsi_full_instruction *inst)
874 {
875 int ch;
876
877 spe_comment(gen->f, -4, "SEQ:");
878
879 for (ch = 0; ch < 4; ch++) {
880 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
881 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
882 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
883 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
884
885 /* d = (s1 == s2) */
886 spe_fceq(gen->f, d_reg, s1_reg, s2_reg);
887
888 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
889 /* d = d & one_reg */
890 spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
891
892 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
893 free_itemps(gen);
894 }
895 }
896
897 return true;
898 }
899
900 /**
901 * Emit set-if_not_equal. See emit_SGT for comments.
902 */
903 static boolean
904 emit_SNE(struct codegen *gen, const struct tgsi_full_instruction *inst)
905 {
906 int ch;
907
908 spe_comment(gen->f, -4, "SNE:");
909
910 for (ch = 0; ch < 4; ch++) {
911 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
912 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
913 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
914 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
915
916 /* d = (s1 != s2) */
917 spe_fceq(gen->f, d_reg, s1_reg, s2_reg);
918 spe_nor(gen->f, d_reg, d_reg, d_reg);
919
920 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
921 /* d = d & one_reg */
922 spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
923
924 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
925 free_itemps(gen);
926 }
927 }
928
929 return true;
930 }
931
932 /**
933 * Emit compare. See emit_SGT for comments.
934 */
935 static boolean
936 emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst)
937 {
938 int ch;
939
940 spe_comment(gen->f, -4, "CMP:");
941
942 for (ch = 0; ch < 4; ch++) {
943 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
944 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
945 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
946 int s3_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
947 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
948 int zero_reg = get_itemp(gen);
949
950 spe_xor(gen->f, zero_reg, zero_reg, zero_reg);
951
952 /* d = (s1 < 0) ? s2 : s3 */
953 spe_fcgt(gen->f, d_reg, zero_reg, s1_reg);
954 spe_selb(gen->f, d_reg, s3_reg, s2_reg, d_reg);
955
956 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
957 free_itemps(gen);
958 }
959 }
960
961 return true;
962 }
963
964 /**
965 * Emit trunc.
966 * Convert float to signed int
967 * Convert signed int to float
968 */
969 static boolean
970 emit_TRUNC(struct codegen *gen, const struct tgsi_full_instruction *inst)
971 {
972 int ch;
973
974 spe_comment(gen->f, -4, "TRUNC:");
975
976 for (ch = 0; ch < 4; ch++) {
977 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
978 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
979 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
980
981 /* Convert float to int */
982 spe_cflts(gen->f, d_reg, s1_reg, 0);
983
984 /* Convert int to float */
985 spe_csflt(gen->f, d_reg, d_reg, 0);
986
987 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
988 free_itemps(gen);
989 }
990 }
991
992 return true;
993 }
994
995 /**
996 * Emit floor.
997 * If negative int subtract one
998 * Convert float to signed int
999 * Convert signed int to float
1000 */
1001 static boolean
1002 emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst)
1003 {
1004 int ch;
1005
1006 spe_comment(gen->f, -4, "FLR:");
1007
1008 int zero_reg = get_itemp(gen);
1009 spe_xor(gen->f, zero_reg, zero_reg, zero_reg);
1010
1011 for (ch = 0; ch < 4; ch++) {
1012 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1013 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1014 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1015 int tmp_reg = get_itemp(gen);
1016
1017 /* If negative, subtract 1.0 */
1018 spe_fcgt(gen->f, d_reg, zero_reg, s1_reg);
1019 spe_selb(gen->f, tmp_reg, zero_reg, get_const_one_reg(gen), d_reg);
1020 spe_fs(gen->f, d_reg, s1_reg, tmp_reg);
1021
1022 /* Convert float to int */
1023 spe_cflts(gen->f, d_reg, d_reg, 0);
1024
1025 /* Convert int to float */
1026 spe_csflt(gen->f, d_reg, d_reg, 0);
1027
1028 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1029 free_itemps(gen);
1030 }
1031 }
1032
1033 return true;
1034 }
1035
1036 /**
1037 * Emit frac.
1038 * Input - FLR(Input)
1039 */
1040 static boolean
1041 emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst)
1042 {
1043 int ch;
1044
1045 spe_comment(gen->f, -4, "FLR:");
1046
1047 int zero_reg = get_itemp(gen);
1048 spe_xor(gen->f, zero_reg, zero_reg, zero_reg);
1049
1050 for (ch = 0; ch < 4; ch++) {
1051 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1052 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1053 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1054 int tmp_reg = get_itemp(gen);
1055
1056 /* If negative, subtract 1.0 */
1057 spe_fcgt(gen->f, d_reg, zero_reg, s1_reg);
1058 spe_selb(gen->f, tmp_reg, zero_reg, get_const_one_reg(gen), d_reg);
1059 spe_fs(gen->f, d_reg, s1_reg, tmp_reg);
1060
1061 /* Convert float to int */
1062 spe_cflts(gen->f, d_reg, d_reg, 0);
1063
1064 /* Convert int to float */
1065 spe_csflt(gen->f, d_reg, d_reg, 0);
1066
1067 /* d = s1 - FLR(s1) */
1068 spe_fs(gen->f, d_reg, s1_reg, d_reg);
1069
1070 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1071 free_itemps(gen);
1072 }
1073 }
1074
1075 return true;
1076 }
1077
1078
1079 /**
1080 * Emit max. See emit_SGT for comments.
1081 */
1082 static boolean
1083 emit_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst)
1084 {
1085 int ch;
1086
1087 spe_comment(gen->f, -4, "MAX:");
1088
1089 for (ch = 0; ch < 4; ch++) {
1090 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1091 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1092 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
1093 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1094
1095 /* d = (s1 > s2) ? s1 : s2 */
1096 spe_fcgt(gen->f, d_reg, s1_reg, s2_reg);
1097 spe_selb(gen->f, d_reg, s2_reg, s1_reg, d_reg);
1098
1099 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1100 free_itemps(gen);
1101 }
1102 }
1103
1104 return true;
1105 }
1106
1107 /**
1108 * Emit max. See emit_SGT for comments.
1109 */
1110 static boolean
1111 emit_MIN(struct codegen *gen, const struct tgsi_full_instruction *inst)
1112 {
1113 int ch;
1114
1115 spe_comment(gen->f, -4, "MIN:");
1116
1117 for (ch = 0; ch < 4; ch++) {
1118 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1119 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1120 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
1121 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1122
1123 /* d = (s2 > s1) ? s1 : s2 */
1124 spe_fcgt(gen->f, d_reg, s2_reg, s1_reg);
1125 spe_selb(gen->f, d_reg, s2_reg, s1_reg, d_reg);
1126
1127 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1128 free_itemps(gen);
1129 }
1130 }
1131
1132 return true;
1133 }
1134
1135 static boolean
1136 emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst)
1137 {
1138 const int channel = 0;
1139 const int exec_reg = get_exec_mask_reg(gen);
1140
1141 spe_comment(gen->f, -4, "IF:");
1142
1143 /* update execution mask with the predicate register */
1144 int tmp_reg = get_itemp(gen);
1145 int s1_reg = get_src_reg(gen, channel, &inst->FullSrcRegisters[0]);
1146
1147 /* tmp = (s1_reg == 0) */
1148 spe_ceqi(gen->f, tmp_reg, s1_reg, 0);
1149 /* tmp = !tmp */
1150 spe_complement(gen->f, tmp_reg, tmp_reg);
1151 /* exec_mask = exec_mask & tmp */
1152 spe_and(gen->f, exec_reg, exec_reg, tmp_reg);
1153
1154 gen->if_nesting++;
1155
1156 free_itemps(gen);
1157
1158 return true;
1159 }
1160
1161
1162 static boolean
1163 emit_ELSE(struct codegen *gen, const struct tgsi_full_instruction *inst)
1164 {
1165 const int exec_reg = get_exec_mask_reg(gen);
1166
1167 spe_comment(gen->f, -4, "ELSE:");
1168
1169 /* exec_mask = !exec_mask */
1170 spe_complement(gen->f, exec_reg, exec_reg);
1171
1172 return true;
1173 }
1174
1175
1176 static boolean
1177 emit_ENDIF(struct codegen *gen, const struct tgsi_full_instruction *inst)
1178 {
1179 const int exec_reg = get_exec_mask_reg(gen);
1180
1181 spe_comment(gen->f, -4, "ENDIF:");
1182
1183 /* XXX todo: pop execution mask */
1184
1185 spe_load_int(gen->f, exec_reg, ~0x0);
1186
1187 gen->if_nesting--;
1188 return true;
1189 }
1190
1191
1192 static boolean
1193 emit_DDX_DDY(struct codegen *gen, const struct tgsi_full_instruction *inst,
1194 boolean ddx)
1195 {
1196 int ch;
1197
1198 spe_comment(gen->f, -4, ddx ? "DDX:" : "DDY:");
1199
1200 for (ch = 0; ch < 4; ch++) {
1201 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1202 int s_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1203 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1204
1205 int t1_reg = get_itemp(gen);
1206 int t2_reg = get_itemp(gen);
1207
1208 spe_splat_word(gen->f, t1_reg, s_reg, 0); /* upper-left pixel */
1209 if (ddx) {
1210 spe_splat_word(gen->f, t2_reg, s_reg, 1); /* upper-right pixel */
1211 }
1212 else {
1213 spe_splat_word(gen->f, t2_reg, s_reg, 2); /* lower-left pixel */
1214 }
1215 spe_fs(gen->f, d_reg, t2_reg, t1_reg);
1216
1217 free_itemps(gen);
1218 }
1219 }
1220
1221 return true;
1222 }
1223
1224
1225
1226
1227 /**
1228 * Emit END instruction.
1229 * We just return from the shader function at this point.
1230 *
1231 * Note that there may be more code after this that would be
1232 * called by TGSI_OPCODE_CALL.
1233 */
1234 static boolean
1235 emit_END(struct codegen *gen)
1236 {
1237 spe_comment(gen->f, -4, "END:");
1238 /* return from function call */
1239 spe_bi(gen->f, SPE_REG_RA, 0, 0);
1240 return true;
1241 }
1242
1243
1244 /**
1245 * Emit code for the given instruction. Just a big switch stmt.
1246 */
1247 static boolean
1248 emit_instruction(struct codegen *gen,
1249 const struct tgsi_full_instruction *inst)
1250 {
1251 switch (inst->Instruction.Opcode) {
1252 case TGSI_OPCODE_MOV:
1253 case TGSI_OPCODE_SWZ:
1254 return emit_MOV(gen, inst);
1255 case TGSI_OPCODE_MUL:
1256 return emit_MUL(gen, inst);
1257 case TGSI_OPCODE_ADD:
1258 return emit_ADD(gen, inst);
1259 case TGSI_OPCODE_SUB:
1260 return emit_SUB(gen, inst);
1261 case TGSI_OPCODE_MAD:
1262 return emit_MAD(gen, inst);
1263 case TGSI_OPCODE_LERP:
1264 return emit_LERP(gen, inst);
1265 case TGSI_OPCODE_DP3:
1266 return emit_DP3(gen, inst);
1267 case TGSI_OPCODE_DP4:
1268 return emit_DP4(gen, inst);
1269 case TGSI_OPCODE_DPH:
1270 return emit_DPH(gen, inst);
1271 case TGSI_OPCODE_XPD:
1272 return emit_XPD(gen, inst);
1273 case TGSI_OPCODE_RCP:
1274 return emit_RCP(gen, inst);
1275 case TGSI_OPCODE_RSQ:
1276 return emit_RSQ(gen, inst);
1277 case TGSI_OPCODE_ABS:
1278 return emit_ABS(gen, inst);
1279 case TGSI_OPCODE_SGT:
1280 return emit_SGT(gen, inst);
1281 case TGSI_OPCODE_SLT:
1282 return emit_SLT(gen, inst);
1283 case TGSI_OPCODE_SGE:
1284 return emit_SGE(gen, inst);
1285 case TGSI_OPCODE_SLE:
1286 return emit_SLE(gen, inst);
1287 case TGSI_OPCODE_SEQ:
1288 return emit_SEQ(gen, inst);
1289 case TGSI_OPCODE_SNE:
1290 return emit_SNE(gen, inst);
1291 case TGSI_OPCODE_CMP:
1292 return emit_CMP(gen, inst);
1293 case TGSI_OPCODE_MAX:
1294 return emit_MAX(gen, inst);
1295 case TGSI_OPCODE_MIN:
1296 return emit_MIN(gen, inst);
1297 case TGSI_OPCODE_TRUNC:
1298 return emit_TRUNC(gen, inst);
1299 case TGSI_OPCODE_FLR:
1300 return emit_FLR(gen, inst);
1301 case TGSI_OPCODE_FRC:
1302 return emit_FRC(gen, inst);
1303 case TGSI_OPCODE_END:
1304 return emit_END(gen);
1305
1306 case TGSI_OPCODE_IF:
1307 return emit_IF(gen, inst);
1308 case TGSI_OPCODE_ELSE:
1309 return emit_ELSE(gen, inst);
1310 case TGSI_OPCODE_ENDIF:
1311 return emit_ENDIF(gen, inst);
1312
1313 case TGSI_OPCODE_DDX:
1314 return emit_DDX_DDY(gen, inst, true);
1315 case TGSI_OPCODE_DDY:
1316 return emit_DDX_DDY(gen, inst, false);
1317
1318 /* XXX lots more cases to do... */
1319
1320 default:
1321 fprintf(stderr, "Cell: unimplemented TGSI instruction %d!\n",
1322 inst->Instruction.Opcode);
1323 return false;
1324 }
1325
1326 return true;
1327 }
1328
1329
1330
1331 /**
1332 * Emit code for a TGSI immediate value (vector of four floats).
1333 * This involves register allocation and initialization.
1334 * XXX the initialization should be done by a "prepare" stage, not
1335 * per quad execution!
1336 */
1337 static boolean
1338 emit_immediate(struct codegen *gen, const struct tgsi_full_immediate *immed)
1339 {
1340 int ch;
1341
1342 assert(gen->num_imm < MAX_TEMPS);
1343
1344 spe_comment(gen->f, -4, "IMMEDIATE:");
1345
1346 for (ch = 0; ch < 4; ch++) {
1347 float val = immed->u.ImmediateFloat32[ch].Float;
1348 int reg = spe_allocate_available_register(gen->f);
1349
1350 if (reg < 0)
1351 return false;
1352
1353 /* update immediate map */
1354 gen->imm_regs[gen->num_imm][ch] = reg;
1355
1356 /* emit initializer instruction */
1357 spe_load_float(gen->f, reg, val);
1358 }
1359
1360 gen->num_imm++;
1361
1362 return true;
1363 }
1364
1365
1366
1367 /**
1368 * Emit "code" for a TGSI declaration.
1369 * We only care about TGSI TEMPORARY register declarations at this time.
1370 * For each TGSI TEMPORARY we allocate four SPE registers.
1371 */
1372 static boolean
1373 emit_declaration(struct cell_context *cell,
1374 struct codegen *gen, const struct tgsi_full_declaration *decl)
1375 {
1376 int i, ch;
1377
1378 switch (decl->Declaration.File) {
1379 case TGSI_FILE_TEMPORARY:
1380 if (cell->debug_flags & CELL_DEBUG_ASM) {
1381 printf("Declare temp reg %d .. %d\n",
1382 decl->DeclarationRange.First,
1383 decl->DeclarationRange.Last);
1384 }
1385
1386 for (i = decl->DeclarationRange.First;
1387 i <= decl->DeclarationRange.Last;
1388 i++) {
1389 assert(i < MAX_TEMPS);
1390 for (ch = 0; ch < 4; ch++) {
1391 gen->temp_regs[i][ch] = spe_allocate_available_register(gen->f);
1392 if (gen->temp_regs[i][ch] < 0)
1393 return false; /* out of regs */
1394 }
1395
1396 /* XXX if we run out of SPE registers, we need to spill
1397 * to SPU memory. someday...
1398 */
1399
1400 if (cell->debug_flags & CELL_DEBUG_ASM) {
1401 printf(" SPE regs: %d %d %d %d\n",
1402 gen->temp_regs[i][0],
1403 gen->temp_regs[i][1],
1404 gen->temp_regs[i][2],
1405 gen->temp_regs[i][3]);
1406 }
1407 }
1408 break;
1409 default:
1410 ; /* ignore */
1411 }
1412
1413 return true;
1414 }
1415
1416
1417 /**
1418 * Translate TGSI shader code to SPE instructions. This is done when
1419 * the state tracker gives us a new shader (via pipe->create_fs_state()).
1420 *
1421 * \param cell the rendering context (in)
1422 * \param tokens the TGSI shader (in)
1423 * \param f the generated function (out)
1424 */
1425 boolean
1426 cell_gen_fragment_program(struct cell_context *cell,
1427 const struct tgsi_token *tokens,
1428 struct spe_function *f)
1429 {
1430 struct tgsi_parse_context parse;
1431 struct codegen gen;
1432
1433 memset(&gen, 0, sizeof(gen));
1434 gen.f = f;
1435
1436 /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
1437 gen.inputs_reg = 3; /* pointer to inputs array */
1438 gen.outputs_reg = 4; /* pointer to outputs array */
1439 gen.constants_reg = 5; /* pointer to constants array */
1440
1441 spe_init_func(f, SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE);
1442 spe_allocate_register(f, gen.inputs_reg);
1443 spe_allocate_register(f, gen.outputs_reg);
1444 spe_allocate_register(f, gen.constants_reg);
1445
1446 if (cell->debug_flags & CELL_DEBUG_ASM) {
1447 spe_print_code(f, true);
1448 spe_indent(f, 8);
1449 printf("Begin %s\n", __FUNCTION__);
1450 tgsi_dump(tokens, 0);
1451 }
1452
1453 tgsi_parse_init(&parse, tokens);
1454
1455 while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) {
1456 tgsi_parse_token(&parse);
1457
1458 switch (parse.FullToken.Token.Type) {
1459 case TGSI_TOKEN_TYPE_IMMEDIATE:
1460 if (!emit_immediate(&gen, &parse.FullToken.FullImmediate))
1461 gen.error = true;
1462 break;
1463
1464 case TGSI_TOKEN_TYPE_DECLARATION:
1465 if (!emit_declaration(cell, &gen, &parse.FullToken.FullDeclaration))
1466 gen.error = true;
1467 break;
1468
1469 case TGSI_TOKEN_TYPE_INSTRUCTION:
1470 if (!emit_instruction(&gen, &parse.FullToken.FullInstruction))
1471 gen.error = true;
1472 break;
1473
1474 default:
1475 assert(0);
1476 }
1477 }
1478
1479
1480 if (gen.error) {
1481 /* terminate the SPE code */
1482 return emit_END(&gen);
1483 }
1484
1485 if (cell->debug_flags & CELL_DEBUG_ASM) {
1486 printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst);
1487 printf("End %s\n", __FUNCTION__);
1488 }
1489
1490 tgsi_parse_free( &parse );
1491
1492 return !gen.error;
1493 }