cell: Added DPH instruction and verified against softpipe.
[mesa.git] / src / gallium / drivers / cell / ppu / cell_gen_fp.c
1 /**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29
30 /**
31 * Generate SPU fragment program/shader code.
32 *
33 * Note that we generate SOA-style code here. So each TGSI instruction
34 * operates on four pixels (and is translated into four SPU instructions,
35 * generally speaking).
36 *
37 * \author Brian Paul
38 */
39
40
41 #include "pipe/p_defines.h"
42 #include "pipe/p_state.h"
43 #include "pipe/p_shader_tokens.h"
44 #include "tgsi/tgsi_parse.h"
45 #include "tgsi/tgsi_util.h"
46 #include "tgsi/tgsi_exec.h"
47 #include "tgsi/tgsi_dump.h"
48 #include "rtasm/rtasm_ppc_spe.h"
49 #include "util/u_memory.h"
50 #include "cell_context.h"
51 #include "cell_gen_fp.h"
52
53
54 #define MAX_TEMPS 16
55 #define MAX_IMMED 8
56
57 #define CHAN_X 0
58 #define CHAN_Y 1
59 #define CHAN_Z 2
60 #define CHAN_W 3
61
62 /**
63 * Context needed during code generation.
64 */
65 struct codegen
66 {
67 int inputs_reg; /**< 1st function parameter */
68 int outputs_reg; /**< 2nd function parameter */
69 int constants_reg; /**< 3rd function parameter */
70 int temp_regs[MAX_TEMPS][4]; /**< maps TGSI temps to SPE registers */
71 int imm_regs[MAX_IMMED][4]; /**< maps TGSI immediates to SPE registers */
72
73 int num_imm; /**< number of immediates */
74
75 int one_reg; /**< register containing {1.0, 1.0, 1.0, 1.0} */
76
77 /** Per-instruction temps / intermediate temps */
78 int num_itemps;
79 int itemps[10];
80
81 /** Current IF/ELSE/ENDIF nesting level */
82 int if_nesting;
83 /** Index of execution mask register */
84 int exec_mask_reg;
85
86 struct spe_function *f;
87 boolean error;
88 };
89
90
91 /**
92 * Allocate an intermediate temporary register.
93 */
94 static int
95 get_itemp(struct codegen *gen)
96 {
97 int t = spe_allocate_available_register(gen->f);
98 assert(gen->num_itemps < Elements(gen->itemps));
99 gen->itemps[gen->num_itemps++] = t;
100 return t;
101 }
102
103 /**
104 * Free all intermediate temporary registers. To be called after each
105 * instruction has been emitted.
106 */
107 static void
108 free_itemps(struct codegen *gen)
109 {
110 int i;
111 for (i = 0; i < gen->num_itemps; i++) {
112 spe_release_register(gen->f, gen->itemps[i]);
113 }
114 gen->num_itemps = 0;
115 }
116
117
118 /**
119 * Return index of an SPE register containing {1.0, 1.0, 1.0, 1.0}.
120 * The register is allocated and initialized upon the first call.
121 */
122 static int
123 get_const_one_reg(struct codegen *gen)
124 {
125 if (gen->one_reg <= 0) {
126 gen->one_reg = spe_allocate_available_register(gen->f);
127
128 spe_indent(gen->f, 4);
129 spe_comment(gen->f, -4, "INIT CONSTANT 1.0:");
130
131 /* one = {1.0, 1.0, 1.0, 1.0} */
132 spe_load_float(gen->f, gen->one_reg, 1.0f);
133
134 spe_indent(gen->f, -4);
135 }
136
137 return gen->one_reg;
138 }
139
140
141 /**
142 * Return index of the pixel execution mask.
143 * The register is allocated an initialized upon the first call.
144 *
145 * The pixel execution mask controls which pixels in a quad are
146 * modified, according to surrounding conditionals, loops, etc.
147 */
148 static int
149 get_exec_mask_reg(struct codegen *gen)
150 {
151 if (gen->exec_mask_reg <= 0) {
152 gen->exec_mask_reg = spe_allocate_available_register(gen->f);
153
154 spe_indent(gen->f, 4);
155 spe_comment(gen->f, -4, "INIT EXEC MASK = ~0:");
156
157 /* exec_mask = {~0, ~0, ~0, ~0} */
158 spe_load_int(gen->f, gen->exec_mask_reg, ~0);
159
160 spe_indent(gen->f, -4);
161 }
162
163 return gen->exec_mask_reg;
164 }
165
166
167 /**
168 * Return the index of the SPU temporary containing the named TGSI
169 * source register. If the TGSI register is a TGSI_FILE_TEMPORARY we
170 * just return the corresponding SPE register. If the TGIS register
171 * is TGSI_FILE_INPUT/CONSTANT/IMMEDIATE we allocate a new SPE register
172 * and emit an SPE load instruction.
173 */
174 static int
175 get_src_reg(struct codegen *gen,
176 int channel,
177 const struct tgsi_full_src_register *src)
178 {
179 int reg = -1;
180 int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel);
181 boolean reg_is_itemp = FALSE;
182 uint sign_op;
183
184 assert(swizzle >= 0);
185 assert(swizzle <= 3);
186
187 channel = swizzle;
188
189 switch (src->SrcRegister.File) {
190 case TGSI_FILE_TEMPORARY:
191 reg = gen->temp_regs[src->SrcRegister.Index][channel];
192 break;
193 case TGSI_FILE_INPUT:
194 {
195 /* offset is measured in quadwords, not bytes */
196 int offset = src->SrcRegister.Index * 4 + channel;
197 reg = get_itemp(gen);
198 reg_is_itemp = TRUE;
199 /* Load: reg = memory[(machine_reg) + offset] */
200 spe_lqd(gen->f, reg, gen->inputs_reg, offset);
201 }
202 break;
203 case TGSI_FILE_IMMEDIATE:
204 reg = gen->imm_regs[src->SrcRegister.Index][channel];
205 break;
206 case TGSI_FILE_CONSTANT:
207 /* xxx fall-through for now / fix */
208 default:
209 assert(0);
210 }
211
212 /*
213 * Handle absolute value, negate or set-negative of src register.
214 */
215 sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel);
216 if (sign_op != TGSI_UTIL_SIGN_KEEP) {
217 /*
218 * All sign ops are done by manipulating bit 31, the IEEE float sign bit.
219 */
220 const int bit31mask_reg = get_itemp(gen);
221 int result_reg;
222
223 if (reg_is_itemp) {
224 /* re-use 'reg' for the result */
225 result_reg = reg;
226 }
227 else {
228 /* alloc a new reg for the result */
229 result_reg = get_itemp(gen);
230 }
231
232 /* mask with bit 31 set, the rest cleared */
233 spe_load_int(gen->f, bit31mask_reg, (1 << 31));
234
235 if (sign_op == TGSI_UTIL_SIGN_CLEAR) {
236 spe_andc(gen->f, result_reg, reg, bit31mask_reg);
237 }
238 else if (sign_op == TGSI_UTIL_SIGN_SET) {
239 spe_and(gen->f, result_reg, reg, bit31mask_reg);
240 }
241 else {
242 assert(sign_op == TGSI_UTIL_SIGN_TOGGLE);
243 spe_xor(gen->f, result_reg, reg, bit31mask_reg);
244 }
245
246 reg = result_reg;
247 }
248
249 return reg;
250 }
251
252
253 /**
254 * Return the index of an SPE register to use for the given TGSI register.
255 * If the TGSI register is TGSI_FILE_TEMPORARAY, the index of the
256 * corresponding SPE register is returned. If the TGSI register is
257 * TGSI_FILE_OUTPUT we allocate an intermediate temporary register.
258 * See store_dest_reg() below...
259 */
260 static int
261 get_dst_reg(struct codegen *gen,
262 int channel,
263 const struct tgsi_full_dst_register *dest)
264 {
265 int reg = -1;
266
267 switch (dest->DstRegister.File) {
268 case TGSI_FILE_TEMPORARY:
269 if (gen->if_nesting > 0)
270 reg = get_itemp(gen);
271 else
272 reg = gen->temp_regs[dest->DstRegister.Index][channel];
273 break;
274 case TGSI_FILE_OUTPUT:
275 reg = get_itemp(gen);
276 break;
277 default:
278 assert(0);
279 }
280
281 return reg;
282 }
283
284
285 /**
286 * When a TGSI instruction is writing to an output register, this
287 * function emits the SPE store instruction to store the value_reg.
288 * \param value_reg the SPE register containing the value to store.
289 * This would have been returned by get_dst_reg().
290 */
291 static void
292 store_dest_reg(struct codegen *gen,
293 int value_reg, int channel,
294 const struct tgsi_full_dst_register *dest)
295 {
296 switch (dest->DstRegister.File) {
297 case TGSI_FILE_TEMPORARY:
298 if (gen->if_nesting > 0) {
299 int d_reg = gen->temp_regs[dest->DstRegister.Index][channel];
300 int exec_reg = get_exec_mask_reg(gen);
301 /* Mix d with new value according to exec mask:
302 * d[i] = mask_reg[i] ? value_reg : d_reg
303 */
304 spe_selb(gen->f, d_reg, d_reg, value_reg, exec_reg);
305 }
306 else {
307 /* we're not inside a condition or loop: do nothing special */
308 }
309 break;
310 case TGSI_FILE_OUTPUT:
311 {
312 /* offset is measured in quadwords, not bytes */
313 int offset = dest->DstRegister.Index * 4 + channel;
314 if (gen->if_nesting > 0) {
315 int exec_reg = get_exec_mask_reg(gen);
316 int curval_reg = get_itemp(gen);
317 /* First read the current value from memory:
318 * Load: curval = memory[(machine_reg) + offset]
319 */
320 spe_lqd(gen->f, curval_reg, gen->outputs_reg, offset);
321 /* Mix curval with newvalue according to exec mask:
322 * d[i] = mask_reg[i] ? value_reg : d_reg
323 */
324 spe_selb(gen->f, curval_reg, curval_reg, value_reg, exec_reg);
325 /* Store: memory[(machine_reg) + offset] = curval */
326 spe_stqd(gen->f, curval_reg, gen->outputs_reg, offset);
327 }
328 else {
329 /* Store: memory[(machine_reg) + offset] = reg */
330 spe_stqd(gen->f, value_reg, gen->outputs_reg, offset);
331 }
332 }
333 break;
334 default:
335 assert(0);
336 }
337 }
338
339
340 static boolean
341 emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst)
342 {
343 int ch;
344 spe_comment(gen->f, -4, "MOV:");
345 for (ch = 0; ch < 4; ch++) {
346 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
347 int src_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
348 int dst_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
349 /* XXX we don't always need to actually emit a mov instruction here */
350 spe_move(gen->f, dst_reg, src_reg);
351 store_dest_reg(gen, dst_reg, ch, &inst->FullDstRegisters[0]);
352 free_itemps(gen);
353 }
354 }
355 return true;
356 }
357
358
359
360 /**
361 * Emit addition instructions. Recall that a single TGSI_OPCODE_ADD
362 * becomes (up to) four SPU "fa" instructions because we're doing SOA
363 * processing.
364 */
365 static boolean
366 emit_ADD(struct codegen *gen, const struct tgsi_full_instruction *inst)
367 {
368 int ch;
369 spe_comment(gen->f, -4, "ADD:");
370 /* Loop over Red/Green/Blue/Alpha channels */
371 for (ch = 0; ch < 4; ch++) {
372 /* If the dest R, G, B or A writemask is enabled... */
373 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
374 /* get indexes of the two src, one dest SPE registers */
375 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
376 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
377 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
378
379 /* Emit actual SPE instruction: d = s1 + s2 */
380 spe_fa(gen->f, d_reg, s1_reg, s2_reg);
381
382 /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
383 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
384 /* Free any intermediate temps we allocated */
385 free_itemps(gen);
386 }
387 }
388 return true;
389 }
390
391 /**
392 * Emit subtract. See emit_ADD for comments.
393 */
394 static boolean
395 emit_SUB(struct codegen *gen, const struct tgsi_full_instruction *inst)
396 {
397 int ch;
398 spe_comment(gen->f, -4, "SUB:");
399 /* Loop over Red/Green/Blue/Alpha channels */
400 for (ch = 0; ch < 4; ch++) {
401 /* If the dest R, G, B or A writemask is enabled... */
402 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
403 /* get indexes of the two src, one dest SPE registers */
404 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
405 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
406 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
407
408 /* Emit actual SPE instruction: d = s1 - s2 */
409 spe_fs(gen->f, d_reg, s1_reg, s2_reg);
410
411 /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */
412 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
413 /* Free any intermediate temps we allocated */
414 free_itemps(gen);
415 }
416 }
417 return true;
418 }
419
420 /**
421 * Emit multiply add. See emit_ADD for comments.
422 */
423 static boolean
424 emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst)
425 {
426 int ch;
427 spe_comment(gen->f, -4, "MAD:");
428 for (ch = 0; ch < 4; ch++) {
429 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
430 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
431 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
432 int s3_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
433 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
434 /* d = s1 * s2 + s3 */
435 spe_fma(gen->f, d_reg, s1_reg, s2_reg, s3_reg);
436 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
437 free_itemps(gen);
438 }
439 }
440 return true;
441 }
442
443
444 /**
445 * Emit linear interpolate. See emit_ADD for comments.
446 */
447 static boolean
448 emit_LERP(struct codegen *gen, const struct tgsi_full_instruction *inst)
449 {
450 int ch;
451 spe_comment(gen->f, -4, "LERP:");
452 for (ch = 0; ch < 4; ch++) {
453 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
454 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
455 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
456 int s3_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
457 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
458 /* d = s3 + s1(s2 - s3) */
459 spe_fs(gen->f, d_reg, s2_reg, s3_reg);
460 spe_fma(gen->f, d_reg, d_reg, s1_reg, s3_reg);
461 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
462 free_itemps(gen);
463 }
464 }
465 return true;
466 }
467
468 /**
469 * Emit multiply. See emit_ADD for comments.
470 */
471 static boolean
472 emit_MUL(struct codegen *gen, const struct tgsi_full_instruction *inst)
473 {
474 int ch;
475 spe_comment(gen->f, -4, "MUL:");
476 for (ch = 0; ch < 4; ch++) {
477 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
478 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
479 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
480 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
481 /* d = s1 * s2 */
482 spe_fm(gen->f, d_reg, s1_reg, s2_reg);
483 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
484 free_itemps(gen);
485 }
486 }
487 return true;
488 }
489
490 /**
491 * Emit reciprocal. See emit_ADD for comments.
492 */
493 static boolean
494 emit_RCP(struct codegen *gen, const struct tgsi_full_instruction *inst)
495 {
496 int ch;
497 spe_comment(gen->f, -4, "RCP:");
498 for (ch = 0; ch < 4; ch++) {
499 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
500 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
501 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
502 /* d = 1/s1 */
503 spe_frest(gen->f, d_reg, s1_reg);
504 spe_fi(gen->f, d_reg, s1_reg, d_reg);
505 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
506 free_itemps(gen);
507 }
508 }
509 return true;
510 }
511
512 /**
513 * Emit reciprocal sqrt. See emit_ADD for comments.
514 */
515 static boolean
516 emit_RSQ(struct codegen *gen, const struct tgsi_full_instruction *inst)
517 {
518 int ch;
519 spe_comment(gen->f, -4, "RSQ:");
520 for (ch = 0; ch < 4; ch++) {
521 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
522 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
523 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
524 /* d = 1/s1 */
525 spe_frsqest(gen->f, d_reg, s1_reg);
526 spe_fi(gen->f, d_reg, s1_reg, d_reg);
527 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
528 free_itemps(gen);
529 }
530 }
531 return true;
532 }
533
534 /**
535 * Emit absolute value. See emit_ADD for comments.
536 */
537 static boolean
538 emit_ABS(struct codegen *gen, const struct tgsi_full_instruction *inst)
539 {
540 int ch;
541 spe_comment(gen->f, -4, "ABS:");
542 for (ch = 0; ch < 4; ch++) {
543 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
544 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
545 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
546 const int bit31mask_reg = get_itemp(gen);
547
548 /* mask with bit 31 set, the rest cleared */
549 spe_load_int(gen->f, bit31mask_reg, (1 << 31));
550
551 /* d = sign bit cleared in s1 */
552 spe_andc(gen->f, d_reg, s1_reg, bit31mask_reg);
553
554 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
555 free_itemps(gen);
556 }
557 }
558 return true;
559 }
560
561 /**
562 * Emit 3 component dot product. See emit_ADD for comments.
563 */
564 static boolean
565 emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst)
566 {
567 int ch;
568 spe_comment(gen->f, -4, "DP3:");
569
570 int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
571 int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
572 int d_reg = get_dst_reg(gen, CHAN_X, &inst->FullDstRegisters[0]);
573 /* d = x * x */
574 spe_fm(gen->f, d_reg, s1_reg, s2_reg);
575
576 s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
577 s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
578 /* d = y * y + d */
579 spe_fma(gen->f, d_reg, s1_reg, s2_reg, d_reg);
580
581 s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
582 s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
583 /* d = z * z + d */
584 spe_fma(gen->f, d_reg, s1_reg, s2_reg, d_reg);
585
586 for (ch = 0; ch < 4; ch++) {
587 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
588 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
589 }
590 }
591
592 free_itemps(gen);
593 return true;
594 }
595
596 /**
597 * Emit 4 component dot product. See emit_ADD for comments.
598 */
599 static boolean
600 emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst)
601 {
602 int ch;
603 spe_comment(gen->f, -4, "DP3:");
604
605 int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
606 int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
607 int d_reg = get_dst_reg(gen, CHAN_X, &inst->FullDstRegisters[0]);
608 /* d = x * x */
609 spe_fm(gen->f, d_reg, s1_reg, s2_reg);
610
611 s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
612 s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
613 /* d = y * y + d */
614 spe_fma(gen->f, d_reg, s1_reg, s2_reg, d_reg);
615
616 s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
617 s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
618 /* d = z * z + d */
619 spe_fma(gen->f, d_reg, s1_reg, s2_reg, d_reg);
620
621 s1_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[0]);
622 s2_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]);
623 /* d = w * w + d */
624 spe_fma(gen->f, d_reg, s1_reg, s2_reg, d_reg);
625
626 for (ch = 0; ch < 4; ch++) {
627 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
628 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
629 }
630 }
631
632 free_itemps(gen);
633 return true;
634 }
635
636 /**
637 * Emit homogeneous dot product. See emit_ADD for comments.
638 */
639 static boolean
640 emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst)
641 {
642 int ch;
643 spe_comment(gen->f, -4, "DPH:");
644
645 int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]);
646 int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]);
647 int d_reg = get_dst_reg(gen, CHAN_X, &inst->FullDstRegisters[0]);
648 /* d = x * x */
649 spe_fm(gen->f, d_reg, s1_reg, s2_reg);
650
651 s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]);
652 s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]);
653 /* d = y * y + d */
654 spe_fma(gen->f, d_reg, s1_reg, s2_reg, d_reg);
655
656 s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]);
657 s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]);
658 /* d = z * z + d */
659 spe_fma(gen->f, d_reg, s1_reg, s2_reg, d_reg);
660
661 s2_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]);
662 /* d = w + d */
663 spe_fa(gen->f, d_reg, s2_reg, d_reg);
664
665 for (ch = 0; ch < 4; ch++) {
666 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
667 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
668 }
669 }
670
671 free_itemps(gen);
672 return true;
673 }
674
675 /**
676 * Emit set-if-greater-than.
677 * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as
678 * the result but OpenGL/TGSI needs 0.0 and 1.0 results.
679 * We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND.
680 */
681 static boolean
682 emit_SGT(struct codegen *gen, const struct tgsi_full_instruction *inst)
683 {
684 int ch;
685
686 spe_comment(gen->f, -4, "SGT:");
687
688 for (ch = 0; ch < 4; ch++) {
689 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
690 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
691 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
692 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
693
694 /* d = (s1 > s2) */
695 spe_fcgt(gen->f, d_reg, s1_reg, s2_reg);
696
697 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
698 /* d = d & one_reg */
699 spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
700
701 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
702 free_itemps(gen);
703 }
704 }
705
706 return true;
707 }
708
709 /**
710 * Emit set-if_less-then. See emit_SGT for comments.
711 */
712 static boolean
713 emit_SLT(struct codegen *gen, const struct tgsi_full_instruction *inst)
714 {
715 int ch;
716
717 spe_comment(gen->f, -4, "SLT:");
718
719 for (ch = 0; ch < 4; ch++) {
720 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
721 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
722 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
723 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
724
725 /* d = (s1 < s2) */
726 spe_fcgt(gen->f, d_reg, s2_reg, s1_reg);
727
728 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
729 /* d = d & one_reg */
730 spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
731
732 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
733 free_itemps(gen);
734 }
735 }
736
737 return true;
738 }
739
740 /**
741 * Emit set-if_greater-then-or-equal. See emit_SGT for comments.
742 */
743 static boolean
744 emit_SGE(struct codegen *gen, const struct tgsi_full_instruction *inst)
745 {
746 int ch;
747
748 spe_comment(gen->f, -4, "SGE:");
749
750 for (ch = 0; ch < 4; ch++) {
751 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
752 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
753 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
754 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
755
756 /* d = (s1 >= s2) */
757 spe_fcgt(gen->f, d_reg, s2_reg, s1_reg);
758
759 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
760 /* d = ~d & one_reg */
761 spe_andc(gen->f, d_reg, get_const_one_reg(gen), d_reg);
762
763 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
764 free_itemps(gen);
765 }
766 }
767
768 return true;
769 }
770
771 /**
772 * Emit set-if_less-then-or-equal. See emit_SGT for comments.
773 */
774 static boolean
775 emit_SLE(struct codegen *gen, const struct tgsi_full_instruction *inst)
776 {
777 int ch;
778
779 spe_comment(gen->f, -4, "SLE:");
780
781 for (ch = 0; ch < 4; ch++) {
782 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
783 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
784 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
785 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
786
787 /* d = (s1 <= s2) */
788 spe_fcgt(gen->f, d_reg, s1_reg, s2_reg);
789
790 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
791 /* d = ~d & one_reg */
792 spe_andc(gen->f, d_reg, get_const_one_reg(gen), d_reg);
793
794 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
795 free_itemps(gen);
796 }
797 }
798
799 return true;
800 }
801
802 /**
803 * Emit set-if_equal. See emit_SGT for comments.
804 */
805 static boolean
806 emit_SEQ(struct codegen *gen, const struct tgsi_full_instruction *inst)
807 {
808 int ch;
809
810 spe_comment(gen->f, -4, "SEQ:");
811
812 for (ch = 0; ch < 4; ch++) {
813 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
814 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
815 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
816 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
817
818 /* d = (s1 == s2) */
819 spe_fceq(gen->f, d_reg, s1_reg, s2_reg);
820
821 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
822 /* d = d & one_reg */
823 spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
824
825 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
826 free_itemps(gen);
827 }
828 }
829
830 return true;
831 }
832
833 /**
834 * Emit set-if_not_equal. See emit_SGT for comments.
835 */
836 static boolean
837 emit_SNE(struct codegen *gen, const struct tgsi_full_instruction *inst)
838 {
839 int ch;
840
841 spe_comment(gen->f, -4, "SNE:");
842
843 for (ch = 0; ch < 4; ch++) {
844 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
845 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
846 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
847 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
848
849 /* d = (s1 != s2) */
850 spe_fceq(gen->f, d_reg, s1_reg, s2_reg);
851 spe_nor(gen->f, d_reg, d_reg, d_reg);
852
853 /* convert d from 0x0/0xffffffff to 0.0/1.0 */
854 /* d = d & one_reg */
855 spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen));
856
857 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
858 free_itemps(gen);
859 }
860 }
861
862 return true;
863 }
864
865 /**
866 * Emit compare. See emit_SGT for comments.
867 */
868 static boolean
869 emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst)
870 {
871 int ch;
872
873 spe_comment(gen->f, -4, "CMP:");
874
875 for (ch = 0; ch < 4; ch++) {
876 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
877 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
878 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
879 int s3_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]);
880 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
881 int zero_reg = get_itemp(gen);
882
883 spe_xor(gen->f, zero_reg, zero_reg, zero_reg);
884
885 /* d = (s1 < 0) ? s2 : s3 */
886 spe_fcgt(gen->f, d_reg, zero_reg, s1_reg);
887 spe_selb(gen->f, d_reg, s3_reg, s2_reg, d_reg);
888
889 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
890 free_itemps(gen);
891 }
892 }
893
894 return true;
895 }
896
897 /**
898 * Emit floor.
899 * If negative int subtract one
900 * Convert float to signed int
901 * Convert signed int to float
902 */
903 static boolean
904 emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst)
905 {
906 int ch;
907
908 spe_comment(gen->f, -4, "FLR:");
909
910 for (ch = 0; ch < 4; ch++) {
911 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
912 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
913 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
914 int tmp_reg = get_itemp(gen);
915
916 /* If negative, subtract 1.0 */
917 spe_xor(gen->f, tmp_reg, tmp_reg, tmp_reg);
918 spe_fcgt(gen->f, d_reg, tmp_reg, s1_reg);
919 spe_selb(gen->f, tmp_reg, tmp_reg, get_const_one_reg(gen), d_reg);
920 spe_fs(gen->f, d_reg, s1_reg, tmp_reg);
921
922 /* Convert float to int */
923 spe_cflts(gen->f, d_reg, d_reg, 0);
924
925 /* Convert int to float */
926 spe_csflt(gen->f, d_reg, d_reg, 0);
927
928 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
929 free_itemps(gen);
930 }
931 }
932
933 return true;
934 }
935
936 /**
937 * Emit frac.
938 * Input - FLR(Input)
939 */
940 static boolean
941 emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst)
942 {
943 int ch;
944
945 spe_comment(gen->f, -4, "FLR:");
946
947 for (ch = 0; ch < 4; ch++) {
948 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
949 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
950 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
951 int tmp_reg = get_itemp(gen);
952
953 /* If negative, subtract 1.0 */
954 spe_xor(gen->f, tmp_reg, tmp_reg, tmp_reg);
955 spe_fcgt(gen->f, d_reg, tmp_reg, s1_reg);
956 spe_selb(gen->f, tmp_reg, tmp_reg, get_const_one_reg(gen), d_reg);
957 spe_fs(gen->f, d_reg, s1_reg, tmp_reg);
958
959 /* Convert float to int */
960 spe_cflts(gen->f, d_reg, d_reg, 0);
961
962 /* Convert int to float */
963 spe_csflt(gen->f, d_reg, d_reg, 0);
964
965 /* d = s1 - FLR(s1) */
966 spe_fs(gen->f, d_reg, s1_reg, d_reg);
967
968 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
969 free_itemps(gen);
970 }
971 }
972
973 return true;
974 }
975
976
977 /**
978 * Emit max. See emit_SGT for comments.
979 */
980 static boolean
981 emit_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst)
982 {
983 int ch;
984
985 spe_comment(gen->f, -4, "MAX:");
986
987 for (ch = 0; ch < 4; ch++) {
988 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
989 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
990 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
991 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
992
993 /* d = (s1 > s2) ? s1 : s2 */
994 spe_fcgt(gen->f, d_reg, s1_reg, s2_reg);
995 spe_selb(gen->f, d_reg, s2_reg, s1_reg, d_reg);
996
997 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
998 free_itemps(gen);
999 }
1000 }
1001
1002 return true;
1003 }
1004
1005 /**
1006 * Emit max. See emit_SGT for comments.
1007 */
1008 static boolean
1009 emit_MIN(struct codegen *gen, const struct tgsi_full_instruction *inst)
1010 {
1011 int ch;
1012
1013 spe_comment(gen->f, -4, "MIN:");
1014
1015 for (ch = 0; ch < 4; ch++) {
1016 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1017 int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1018 int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]);
1019 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1020
1021 /* d = (s2 > s1) ? s1 : s2 */
1022 spe_fcgt(gen->f, d_reg, s2_reg, s1_reg);
1023 spe_selb(gen->f, d_reg, s2_reg, s1_reg, d_reg);
1024
1025 store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]);
1026 free_itemps(gen);
1027 }
1028 }
1029
1030 return true;
1031 }
1032
1033 static boolean
1034 emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst)
1035 {
1036 const int channel = 0;
1037 const int exec_reg = get_exec_mask_reg(gen);
1038
1039 spe_comment(gen->f, -4, "IF:");
1040
1041 /* update execution mask with the predicate register */
1042 int tmp_reg = get_itemp(gen);
1043 int s1_reg = get_src_reg(gen, channel, &inst->FullSrcRegisters[0]);
1044
1045 /* tmp = (s1_reg == 0) */
1046 spe_ceqi(gen->f, tmp_reg, s1_reg, 0);
1047 /* tmp = !tmp */
1048 spe_complement(gen->f, tmp_reg, tmp_reg);
1049 /* exec_mask = exec_mask & tmp */
1050 spe_and(gen->f, exec_reg, exec_reg, tmp_reg);
1051
1052 gen->if_nesting++;
1053
1054 free_itemps(gen);
1055
1056 return true;
1057 }
1058
1059
1060 static boolean
1061 emit_ELSE(struct codegen *gen, const struct tgsi_full_instruction *inst)
1062 {
1063 const int exec_reg = get_exec_mask_reg(gen);
1064
1065 spe_comment(gen->f, -4, "ELSE:");
1066
1067 /* exec_mask = !exec_mask */
1068 spe_complement(gen->f, exec_reg, exec_reg);
1069
1070 return true;
1071 }
1072
1073
1074 static boolean
1075 emit_ENDIF(struct codegen *gen, const struct tgsi_full_instruction *inst)
1076 {
1077 const int exec_reg = get_exec_mask_reg(gen);
1078
1079 spe_comment(gen->f, -4, "ENDIF:");
1080
1081 /* XXX todo: pop execution mask */
1082
1083 spe_load_int(gen->f, exec_reg, ~0x0);
1084
1085 gen->if_nesting--;
1086 return true;
1087 }
1088
1089
1090 static boolean
1091 emit_DDX_DDY(struct codegen *gen, const struct tgsi_full_instruction *inst,
1092 boolean ddx)
1093 {
1094 int ch;
1095
1096 spe_comment(gen->f, -4, ddx ? "DDX:" : "DDY:");
1097
1098 for (ch = 0; ch < 4; ch++) {
1099 if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) {
1100 int s_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]);
1101 int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]);
1102
1103 int t1_reg = get_itemp(gen);
1104 int t2_reg = get_itemp(gen);
1105
1106 spe_splat_word(gen->f, t1_reg, s_reg, 0); /* upper-left pixel */
1107 if (ddx) {
1108 spe_splat_word(gen->f, t2_reg, s_reg, 1); /* upper-right pixel */
1109 }
1110 else {
1111 spe_splat_word(gen->f, t2_reg, s_reg, 2); /* lower-left pixel */
1112 }
1113 spe_fs(gen->f, d_reg, t2_reg, t1_reg);
1114
1115 free_itemps(gen);
1116 }
1117 }
1118
1119 return true;
1120 }
1121
1122
1123
1124
1125 /**
1126 * Emit END instruction.
1127 * We just return from the shader function at this point.
1128 *
1129 * Note that there may be more code after this that would be
1130 * called by TGSI_OPCODE_CALL.
1131 */
1132 static boolean
1133 emit_END(struct codegen *gen)
1134 {
1135 spe_comment(gen->f, -4, "END:");
1136 /* return from function call */
1137 spe_bi(gen->f, SPE_REG_RA, 0, 0);
1138 return true;
1139 }
1140
1141
1142 /**
1143 * Emit code for the given instruction. Just a big switch stmt.
1144 */
1145 static boolean
1146 emit_instruction(struct codegen *gen,
1147 const struct tgsi_full_instruction *inst)
1148 {
1149 switch (inst->Instruction.Opcode) {
1150 case TGSI_OPCODE_MOV:
1151 return emit_MOV(gen, inst);
1152 case TGSI_OPCODE_MUL:
1153 return emit_MUL(gen, inst);
1154 case TGSI_OPCODE_ADD:
1155 return emit_ADD(gen, inst);
1156 case TGSI_OPCODE_SUB:
1157 return emit_SUB(gen, inst);
1158 case TGSI_OPCODE_MAD:
1159 return emit_MAD(gen, inst);
1160 case TGSI_OPCODE_LERP:
1161 return emit_LERP(gen, inst);
1162 case TGSI_OPCODE_DP3:
1163 return emit_DP3(gen, inst);
1164 case TGSI_OPCODE_DP4:
1165 return emit_DP4(gen, inst);
1166 case TGSI_OPCODE_DPH:
1167 return emit_DPH(gen, inst);
1168 case TGSI_OPCODE_RCP:
1169 return emit_RCP(gen, inst);
1170 case TGSI_OPCODE_RSQ:
1171 return emit_RSQ(gen, inst);
1172 case TGSI_OPCODE_ABS:
1173 return emit_ABS(gen, inst);
1174 case TGSI_OPCODE_SGT:
1175 return emit_SGT(gen, inst);
1176 case TGSI_OPCODE_SLT:
1177 return emit_SLT(gen, inst);
1178 case TGSI_OPCODE_SGE:
1179 return emit_SGE(gen, inst);
1180 case TGSI_OPCODE_SLE:
1181 return emit_SLE(gen, inst);
1182 case TGSI_OPCODE_SEQ:
1183 return emit_SEQ(gen, inst);
1184 case TGSI_OPCODE_SNE:
1185 return emit_SNE(gen, inst);
1186 case TGSI_OPCODE_CMP:
1187 return emit_CMP(gen, inst);
1188 case TGSI_OPCODE_MAX:
1189 return emit_MAX(gen, inst);
1190 case TGSI_OPCODE_MIN:
1191 return emit_MIN(gen, inst);
1192 case TGSI_OPCODE_FLR:
1193 return emit_FLR(gen, inst);
1194 case TGSI_OPCODE_FRC:
1195 return emit_FRC(gen, inst);
1196 case TGSI_OPCODE_END:
1197 return emit_END(gen);
1198
1199 case TGSI_OPCODE_IF:
1200 return emit_IF(gen, inst);
1201 case TGSI_OPCODE_ELSE:
1202 return emit_ELSE(gen, inst);
1203 case TGSI_OPCODE_ENDIF:
1204 return emit_ENDIF(gen, inst);
1205
1206 case TGSI_OPCODE_DDX:
1207 return emit_DDX_DDY(gen, inst, true);
1208 case TGSI_OPCODE_DDY:
1209 return emit_DDX_DDY(gen, inst, false);
1210
1211 /* XXX lots more cases to do... */
1212
1213 default:
1214 fprintf(stderr, "Cell: unimplemented TGSI instruction %d!\n",
1215 inst->Instruction.Opcode);
1216 return false;
1217 }
1218
1219 return true;
1220 }
1221
1222
1223
1224 /**
1225 * Emit code for a TGSI immediate value (vector of four floats).
1226 * This involves register allocation and initialization.
1227 * XXX the initialization should be done by a "prepare" stage, not
1228 * per quad execution!
1229 */
1230 static boolean
1231 emit_immediate(struct codegen *gen, const struct tgsi_full_immediate *immed)
1232 {
1233 int ch;
1234
1235 assert(gen->num_imm < MAX_TEMPS);
1236
1237 spe_comment(gen->f, -4, "IMMEDIATE:");
1238
1239 for (ch = 0; ch < 4; ch++) {
1240 float val = immed->u.ImmediateFloat32[ch].Float;
1241 int reg = spe_allocate_available_register(gen->f);
1242
1243 if (reg < 0)
1244 return false;
1245
1246 /* update immediate map */
1247 gen->imm_regs[gen->num_imm][ch] = reg;
1248
1249 /* emit initializer instruction */
1250 spe_load_float(gen->f, reg, val);
1251 }
1252
1253 gen->num_imm++;
1254
1255 return true;
1256 }
1257
1258
1259
1260 /**
1261 * Emit "code" for a TGSI declaration.
1262 * We only care about TGSI TEMPORARY register declarations at this time.
1263 * For each TGSI TEMPORARY we allocate four SPE registers.
1264 */
1265 static boolean
1266 emit_declaration(struct cell_context *cell,
1267 struct codegen *gen, const struct tgsi_full_declaration *decl)
1268 {
1269 int i, ch;
1270
1271 switch (decl->Declaration.File) {
1272 case TGSI_FILE_TEMPORARY:
1273 if (cell->debug_flags & CELL_DEBUG_ASM) {
1274 printf("Declare temp reg %d .. %d\n",
1275 decl->DeclarationRange.First,
1276 decl->DeclarationRange.Last);
1277 }
1278
1279 for (i = decl->DeclarationRange.First;
1280 i <= decl->DeclarationRange.Last;
1281 i++) {
1282 assert(i < MAX_TEMPS);
1283 for (ch = 0; ch < 4; ch++) {
1284 gen->temp_regs[i][ch] = spe_allocate_available_register(gen->f);
1285 if (gen->temp_regs[i][ch] < 0)
1286 return false; /* out of regs */
1287 }
1288
1289 /* XXX if we run out of SPE registers, we need to spill
1290 * to SPU memory. someday...
1291 */
1292
1293 if (cell->debug_flags & CELL_DEBUG_ASM) {
1294 printf(" SPE regs: %d %d %d %d\n",
1295 gen->temp_regs[i][0],
1296 gen->temp_regs[i][1],
1297 gen->temp_regs[i][2],
1298 gen->temp_regs[i][3]);
1299 }
1300 }
1301 break;
1302 default:
1303 ; /* ignore */
1304 }
1305
1306 return true;
1307 }
1308
1309
1310 /**
1311 * Translate TGSI shader code to SPE instructions. This is done when
1312 * the state tracker gives us a new shader (via pipe->create_fs_state()).
1313 *
1314 * \param cell the rendering context (in)
1315 * \param tokens the TGSI shader (in)
1316 * \param f the generated function (out)
1317 */
1318 boolean
1319 cell_gen_fragment_program(struct cell_context *cell,
1320 const struct tgsi_token *tokens,
1321 struct spe_function *f)
1322 {
1323 struct tgsi_parse_context parse;
1324 struct codegen gen;
1325
1326 memset(&gen, 0, sizeof(gen));
1327 gen.f = f;
1328
1329 /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
1330 gen.inputs_reg = 3; /* pointer to inputs array */
1331 gen.outputs_reg = 4; /* pointer to outputs array */
1332 gen.constants_reg = 5; /* pointer to constants array */
1333
1334 spe_init_func(f, SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE);
1335 spe_allocate_register(f, gen.inputs_reg);
1336 spe_allocate_register(f, gen.outputs_reg);
1337 spe_allocate_register(f, gen.constants_reg);
1338
1339 if (cell->debug_flags & CELL_DEBUG_ASM) {
1340 spe_print_code(f, true);
1341 spe_indent(f, 8);
1342 printf("Begin %s\n", __FUNCTION__);
1343 tgsi_dump(tokens, 0);
1344 }
1345
1346 tgsi_parse_init(&parse, tokens);
1347
1348 while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) {
1349 tgsi_parse_token(&parse);
1350
1351 switch (parse.FullToken.Token.Type) {
1352 case TGSI_TOKEN_TYPE_IMMEDIATE:
1353 if (!emit_immediate(&gen, &parse.FullToken.FullImmediate))
1354 gen.error = true;
1355 break;
1356
1357 case TGSI_TOKEN_TYPE_DECLARATION:
1358 if (!emit_declaration(cell, &gen, &parse.FullToken.FullDeclaration))
1359 gen.error = true;
1360 break;
1361
1362 case TGSI_TOKEN_TYPE_INSTRUCTION:
1363 if (!emit_instruction(&gen, &parse.FullToken.FullInstruction))
1364 gen.error = true;
1365 break;
1366
1367 default:
1368 assert(0);
1369 }
1370 }
1371
1372
1373 if (gen.error) {
1374 /* terminate the SPE code */
1375 return emit_END(&gen);
1376 }
1377
1378 if (cell->debug_flags & CELL_DEBUG_ASM) {
1379 printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst);
1380 printf("End %s\n", __FUNCTION__);
1381 }
1382
1383 tgsi_parse_free( &parse );
1384
1385 return !gen.error;
1386 }