Merge commit 'origin/gallium-0.1' into gallium-0.2
[mesa.git] / src / mesa / shader / slang / slang_emit.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 7.1
4 *
5 * Copyright (C) 2005-2008 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file slang_emit.c
27 * Emit program instructions (PI code) from IR trees.
28 * \author Brian Paul
29 */
30
31 /***
32 *** NOTES
33 ***
34 *** To emit GPU instructions, we basically just do an in-order traversal
35 *** of the IR tree.
36 ***/
37
38
39 #include "main/imports.h"
40 #include "main/context.h"
41 #include "main/macros.h"
42 #include "shader/program.h"
43 #include "shader/prog_instruction.h"
44 #include "shader/prog_parameter.h"
45 #include "shader/prog_print.h"
46 #include "slang_builtin.h"
47 #include "slang_emit.h"
48 #include "slang_mem.h"
49
50
51 #define PEEPHOLE_OPTIMIZATIONS 1
52 #define ANNOTATE 0
53
54
55 typedef struct
56 {
57 slang_info_log *log;
58 slang_var_table *vt;
59 struct gl_program *prog;
60 struct gl_program **Subroutines;
61 GLuint NumSubroutines;
62
63 GLuint MaxInstructions; /**< size of prog->Instructions[] buffer */
64
65 /* code-gen options */
66 GLboolean EmitHighLevelInstructions;
67 GLboolean EmitCondCodes;
68 GLboolean EmitComments;
69 GLboolean EmitBeginEndSub; /* XXX TEMPORARY */
70 } slang_emit_info;
71
72
73
74 static struct gl_program *
75 new_subroutine(slang_emit_info *emitInfo, GLuint *id)
76 {
77 GET_CURRENT_CONTEXT(ctx);
78 const GLuint n = emitInfo->NumSubroutines;
79
80 emitInfo->Subroutines = (struct gl_program **)
81 _mesa_realloc(emitInfo->Subroutines,
82 n * sizeof(struct gl_program),
83 (n + 1) * sizeof(struct gl_program));
84 emitInfo->Subroutines[n] = ctx->Driver.NewProgram(ctx, emitInfo->prog->Target, 0);
85 emitInfo->Subroutines[n]->Parameters = emitInfo->prog->Parameters;
86 emitInfo->NumSubroutines++;
87 *id = n;
88 return emitInfo->Subroutines[n];
89 }
90
91
92 /**
93 * Convert a writemask to a swizzle. Used for testing cond codes because
94 * we only want to test the cond code component(s) that was set by the
95 * previous instruction.
96 */
97 static GLuint
98 writemask_to_swizzle(GLuint writemask)
99 {
100 if (writemask == WRITEMASK_X)
101 return SWIZZLE_XXXX;
102 if (writemask == WRITEMASK_Y)
103 return SWIZZLE_YYYY;
104 if (writemask == WRITEMASK_Z)
105 return SWIZZLE_ZZZZ;
106 if (writemask == WRITEMASK_W)
107 return SWIZZLE_WWWW;
108 return SWIZZLE_XYZW; /* shouldn't be hit */
109 }
110
111
112 /**
113 * Convert a swizzle mask to a writemask.
114 * Note that the slang_ir_storage->Swizzle field can represent either a
115 * swizzle mask or a writemask, depending on how it's used. For example,
116 * when we parse "direction.yz" alone, we don't know whether .yz is a
117 * writemask or a swizzle. In this case, we encode ".yz" in store->Swizzle
118 * as a swizzle mask (.yz?? actually). Later, if direction.yz is used as
119 * an R-value, we use store->Swizzle as-is. Otherwise, if direction.yz is
120 * used as an L-value, we convert it to a writemask.
121 */
122 static GLuint
123 swizzle_to_writemask(GLuint swizzle)
124 {
125 GLuint i, writemask = 0x0;
126 for (i = 0; i < 4; i++) {
127 GLuint swz = GET_SWZ(swizzle, i);
128 if (swz <= SWIZZLE_W) {
129 writemask |= (1 << swz);
130 }
131 }
132 return writemask;
133 }
134
135
136 /**
137 * Swizzle a swizzle (function composition).
138 * That is, return swz2(swz1), or said another way: swz1.szw2
139 * Example: swizzle_swizzle(".zwxx", ".xxyw") yields ".zzwx"
140 */
141 GLuint
142 _slang_swizzle_swizzle(GLuint swz1, GLuint swz2)
143 {
144 GLuint i, swz, s[4];
145 for (i = 0; i < 4; i++) {
146 GLuint c = GET_SWZ(swz2, i);
147 if (c <= SWIZZLE_W)
148 s[i] = GET_SWZ(swz1, c);
149 else
150 s[i] = c;
151 }
152 swz = MAKE_SWIZZLE4(s[0], s[1], s[2], s[3]);
153 return swz;
154 }
155
156
157 /**
158 * Allocate storage for the given node (if it hasn't already been allocated).
159 *
160 * Typically this is temporary storage for an intermediate result (such as
161 * for a multiply or add, etc).
162 *
163 * If n->Store does not exist it will be created and will be of the size
164 * specified by defaultSize.
165 */
166 static GLboolean
167 alloc_node_storage(slang_emit_info *emitInfo, slang_ir_node *n,
168 GLint defaultSize)
169 {
170 assert(!n->Var);
171 if (!n->Store) {
172 assert(defaultSize > 0);
173 n->Store = _slang_new_ir_storage(PROGRAM_TEMPORARY, -1, defaultSize);
174 }
175
176 /* now allocate actual register(s). I.e. set n->Store->Index >= 0 */
177 if (n->Store->Index < 0) {
178 if (!_slang_alloc_temp(emitInfo->vt, n->Store)) {
179 slang_info_log_error(emitInfo->log,
180 "Ran out of registers, too many temporaries");
181 _slang_free(n->Store);
182 n->Store = NULL;
183 return GL_FALSE;
184 }
185 }
186 return GL_TRUE;
187 }
188
189
190 /**
191 * Free temporary storage, if n->Store is, in fact, temp storage.
192 * Otherwise, no-op.
193 */
194 static void
195 free_node_storage(slang_var_table *vt, slang_ir_node *n)
196 {
197 if (n->Store->File == PROGRAM_TEMPORARY &&
198 n->Store->Index >= 0 &&
199 n->Opcode != IR_SWIZZLE) {
200 if (_slang_is_temp(vt, n->Store)) {
201 _slang_free_temp(vt, n->Store);
202 n->Store->Index = -1;
203 n->Store = NULL; /* XXX this may not be needed */
204 }
205 }
206 }
207
208
209 /**
210 * Helper function to allocate a short-term temporary.
211 * Free it with _slang_free_temp().
212 */
213 static GLboolean
214 alloc_local_temp(slang_emit_info *emitInfo, slang_ir_storage *temp, GLint size)
215 {
216 assert(size >= 1);
217 assert(size <= 4);
218 _mesa_bzero(temp, sizeof(*temp));
219 temp->Size = size;
220 temp->File = PROGRAM_TEMPORARY;
221 temp->Index = -1;
222 return _slang_alloc_temp(emitInfo->vt, temp);
223 }
224
225
226 /**
227 * Remove any SWIZZLE_NIL terms from given swizzle mask.
228 * For a swizzle like .z??? generate .zzzz (replicate single component).
229 * Else, for .wx?? generate .wxzw (insert default component for the position).
230 */
231 static GLuint
232 fix_swizzle(GLuint swizzle)
233 {
234 GLuint c0 = GET_SWZ(swizzle, 0),
235 c1 = GET_SWZ(swizzle, 1),
236 c2 = GET_SWZ(swizzle, 2),
237 c3 = GET_SWZ(swizzle, 3);
238 if (c1 == SWIZZLE_NIL && c2 == SWIZZLE_NIL && c3 == SWIZZLE_NIL) {
239 /* smear first component across all positions */
240 c1 = c2 = c3 = c0;
241 }
242 else {
243 /* insert default swizzle components */
244 if (c0 == SWIZZLE_NIL)
245 c0 = SWIZZLE_X;
246 if (c1 == SWIZZLE_NIL)
247 c1 = SWIZZLE_Y;
248 if (c2 == SWIZZLE_NIL)
249 c2 = SWIZZLE_Z;
250 if (c3 == SWIZZLE_NIL)
251 c3 = SWIZZLE_W;
252 }
253 return MAKE_SWIZZLE4(c0, c1, c2, c3);
254 }
255
256
257
258 /**
259 * Convert IR storage to an instruction dst register.
260 */
261 static void
262 storage_to_dst_reg(struct prog_dst_register *dst, const slang_ir_storage *st)
263 {
264 const GLboolean relAddr = st->RelAddr;
265 const GLint size = st->Size;
266 GLint index = st->Index;
267 GLuint swizzle = st->Swizzle;
268
269 assert(index >= 0);
270 /* if this is storage relative to some parent storage, walk up the tree */
271 while (st->Parent) {
272 st = st->Parent;
273 assert(st->Index >= 0);
274 index += st->Index;
275 swizzle = _slang_swizzle_swizzle(st->Swizzle, swizzle);
276 }
277
278 assert(st->File != PROGRAM_UNDEFINED);
279 dst->File = st->File;
280
281 assert(index >= 0);
282 dst->Index = index;
283
284 assert(size >= 1);
285 assert(size <= 4);
286
287 if (swizzle != SWIZZLE_XYZW) {
288 dst->WriteMask = swizzle_to_writemask(swizzle);
289 }
290 else {
291 GLuint writemask;
292 switch (size) {
293 case 1:
294 writemask = WRITEMASK_X << GET_SWZ(st->Swizzle, 0);
295 break;
296 case 2:
297 writemask = WRITEMASK_XY;
298 break;
299 case 3:
300 writemask = WRITEMASK_XYZ;
301 break;
302 case 4:
303 writemask = WRITEMASK_XYZW;
304 break;
305 default:
306 ; /* error would have been caught above */
307 }
308 dst->WriteMask = writemask;
309 }
310
311 dst->RelAddr = relAddr;
312 }
313
314
315 /**
316 * Convert IR storage to an instruction src register.
317 */
318 static void
319 storage_to_src_reg(struct prog_src_register *src, const slang_ir_storage *st)
320 {
321 const GLboolean relAddr = st->RelAddr;
322 GLint index = st->Index;
323 GLuint swizzle = st->Swizzle;
324
325 /* if this is storage relative to some parent storage, walk up the tree */
326 assert(index >= 0);
327 while (st->Parent) {
328 st = st->Parent;
329 assert(st->Index >= 0);
330 index += st->Index;
331 swizzle = _slang_swizzle_swizzle(fix_swizzle(st->Swizzle), swizzle);
332 }
333
334 assert(st->File >= 0);
335 #if 1 /* XXX temporary */
336 if (st->File == PROGRAM_UNDEFINED) {
337 slang_ir_storage *st0 = (slang_ir_storage *) st;
338 st0->File = PROGRAM_TEMPORARY;
339 }
340 #endif
341 assert(st->File < PROGRAM_UNDEFINED);
342 src->File = st->File;
343
344 assert(index >= 0);
345 src->Index = index;
346
347 swizzle = fix_swizzle(swizzle);
348 assert(GET_SWZ(swizzle, 0) <= SWIZZLE_W);
349 assert(GET_SWZ(swizzle, 1) <= SWIZZLE_W);
350 assert(GET_SWZ(swizzle, 2) <= SWIZZLE_W);
351 assert(GET_SWZ(swizzle, 3) <= SWIZZLE_W);
352 src->Swizzle = swizzle;
353
354 src->RelAddr = relAddr;
355 }
356
357
358 /*
359 * Setup storage pointing to a scalar constant/literal.
360 */
361 static void
362 constant_to_storage(slang_emit_info *emitInfo,
363 GLfloat val,
364 slang_ir_storage *store)
365 {
366 GLuint swizzle;
367 GLint reg;
368 GLfloat value[4];
369
370 value[0] = val;
371 reg = _mesa_add_unnamed_constant(emitInfo->prog->Parameters,
372 value, 1, &swizzle);
373
374 memset(store, 0, sizeof(*store));
375 store->File = PROGRAM_CONSTANT;
376 store->Index = reg;
377 store->Swizzle = swizzle;
378 }
379
380
381 /**
382 * Add new instruction at end of given program.
383 * \param prog the program to append instruction onto
384 * \param opcode opcode for the new instruction
385 * \return pointer to the new instruction
386 */
387 static struct prog_instruction *
388 new_instruction(slang_emit_info *emitInfo, gl_inst_opcode opcode)
389 {
390 struct gl_program *prog = emitInfo->prog;
391 struct prog_instruction *inst;
392
393 #if 0
394 /* print prev inst */
395 if (prog->NumInstructions > 0) {
396 _mesa_print_instruction(prog->Instructions + prog->NumInstructions - 1);
397 }
398 #endif
399 assert(prog->NumInstructions <= emitInfo->MaxInstructions);
400
401 if (prog->NumInstructions == emitInfo->MaxInstructions) {
402 /* grow the instruction buffer */
403 emitInfo->MaxInstructions += 20;
404 prog->Instructions =
405 _mesa_realloc_instructions(prog->Instructions,
406 prog->NumInstructions,
407 emitInfo->MaxInstructions);
408 }
409
410 inst = prog->Instructions + prog->NumInstructions;
411 prog->NumInstructions++;
412 _mesa_init_instructions(inst, 1);
413 inst->Opcode = opcode;
414 inst->BranchTarget = -1; /* invalid */
415 /*
416 printf("New inst %d: %p %s\n", prog->NumInstructions-1,(void*)inst,
417 _mesa_opcode_string(inst->Opcode));
418 */
419 return inst;
420 }
421
422
423 static struct prog_instruction *
424 emit_arl_load(slang_emit_info *emitInfo,
425 enum register_file file, GLint index, GLuint swizzle)
426 {
427 struct prog_instruction *inst = new_instruction(emitInfo, OPCODE_ARL);
428 inst->SrcReg[0].File = file;
429 inst->SrcReg[0].Index = index;
430 inst->SrcReg[0].Swizzle = swizzle;
431 inst->DstReg.File = PROGRAM_ADDRESS;
432 inst->DstReg.Index = 0;
433 inst->DstReg.WriteMask = WRITEMASK_X;
434 return inst;
435 }
436
437
438 /**
439 * Emit a new instruction with given opcode, operands.
440 * At this point the instruction may have multiple indirect register
441 * loads/stores. We convert those into ARL loads and address-relative
442 * operands. See comments inside.
443 * At some point in the future we could directly emit indirectly addressed
444 * registers in Mesa GPU instructions.
445 */
446 static struct prog_instruction *
447 emit_instruction(slang_emit_info *emitInfo,
448 gl_inst_opcode opcode,
449 const slang_ir_storage *dst,
450 const slang_ir_storage *src0,
451 const slang_ir_storage *src1,
452 const slang_ir_storage *src2)
453 {
454 struct prog_instruction *inst;
455 GLuint numIndirect = 0;
456 const slang_ir_storage *src[3];
457 slang_ir_storage newSrc[3], newDst;
458 GLuint i;
459 GLboolean isTemp[3];
460
461 isTemp[0] = isTemp[1] = isTemp[2] = GL_FALSE;
462
463 src[0] = src0;
464 src[1] = src1;
465 src[2] = src2;
466
467 /* count up how many operands are indirect loads */
468 for (i = 0; i < 3; i++) {
469 if (src[i] && src[i]->IsIndirect)
470 numIndirect++;
471 }
472 if (dst && dst->IsIndirect)
473 numIndirect++;
474
475 /* Take special steps for indirect register loads.
476 * If we had multiple address registers this would be simpler.
477 * For example, this GLSL code:
478 * x[i] = y[j] + z[k];
479 * would translate into something like:
480 * ARL ADDR.x, i;
481 * ARL ADDR.y, j;
482 * ARL ADDR.z, k;
483 * ADD TEMP[ADDR.x+5], TEMP[ADDR.y+9], TEMP[ADDR.z+4];
484 * But since we currently only have one address register we have to do this:
485 * ARL ADDR.x, i;
486 * MOV t1, TEMP[ADDR.x+9];
487 * ARL ADDR.x, j;
488 * MOV t2, TEMP[ADDR.x+4];
489 * ARL ADDR.x, k;
490 * ADD TEMP[ADDR.x+5], t1, t2;
491 * The code here figures this out...
492 */
493 if (numIndirect > 0) {
494 for (i = 0; i < 3; i++) {
495 if (src[i] && src[i]->IsIndirect) {
496 /* load the ARL register with the indirect register */
497 emit_arl_load(emitInfo,
498 src[i]->IndirectFile,
499 src[i]->IndirectIndex,
500 src[i]->IndirectSwizzle);
501
502 if (numIndirect > 1) {
503 /* Need to load src[i] into a temporary register */
504 slang_ir_storage srcRelAddr;
505 alloc_local_temp(emitInfo, &newSrc[i], src[i]->Size);
506 isTemp[i] = GL_TRUE;
507
508 /* set RelAddr flag on src register */
509 srcRelAddr = *src[i];
510 srcRelAddr.RelAddr = GL_TRUE;
511 srcRelAddr.IsIndirect = GL_FALSE; /* not really needed */
512
513 /* MOV newSrc, srcRelAddr; */
514 inst = emit_instruction(emitInfo,
515 OPCODE_MOV,
516 &newSrc[i],
517 &srcRelAddr,
518 NULL,
519 NULL);
520
521 src[i] = &newSrc[i];
522 }
523 else {
524 /* just rewrite the src[i] storage to be ARL-relative */
525 newSrc[i] = *src[i];
526 newSrc[i].RelAddr = GL_TRUE;
527 newSrc[i].IsIndirect = GL_FALSE; /* not really needed */
528 src[i] = &newSrc[i];
529 }
530 }
531 }
532 }
533
534 /* Take special steps for indirect dest register write */
535 if (dst && dst->IsIndirect) {
536 /* load the ARL register with the indirect register */
537 emit_arl_load(emitInfo,
538 dst->IndirectFile,
539 dst->IndirectIndex,
540 dst->IndirectSwizzle);
541 newDst = *dst;
542 newDst.RelAddr = GL_TRUE;
543 newDst.IsIndirect = GL_FALSE;
544 dst = &newDst;
545 }
546
547 /* OK, emit the instruction and its dst, src regs */
548 inst = new_instruction(emitInfo, opcode);
549 if (!inst)
550 return NULL;
551
552 if (dst)
553 storage_to_dst_reg(&inst->DstReg, dst);
554
555 for (i = 0; i < 3; i++) {
556 if (src[i])
557 storage_to_src_reg(&inst->SrcReg[i], src[i]);
558 }
559
560 /* Free any temp registers that we allocated above */
561 for (i = 0; i < 3; i++) {
562 if (isTemp[i])
563 _slang_free_temp(emitInfo->vt, &newSrc[i]);
564 }
565
566 return inst;
567 }
568
569
570
571 /**
572 * Put a comment on the given instruction.
573 */
574 static void
575 inst_comment(struct prog_instruction *inst, const char *comment)
576 {
577 if (inst)
578 inst->Comment = _mesa_strdup(comment);
579 }
580
581
582
583 /**
584 * Return pointer to last instruction in program.
585 */
586 static struct prog_instruction *
587 prev_instruction(slang_emit_info *emitInfo)
588 {
589 struct gl_program *prog = emitInfo->prog;
590 if (prog->NumInstructions == 0)
591 return NULL;
592 else
593 return prog->Instructions + prog->NumInstructions - 1;
594 }
595
596
597 static struct prog_instruction *
598 emit(slang_emit_info *emitInfo, slang_ir_node *n);
599
600
601 /**
602 * Return an annotation string for given node's storage.
603 */
604 static char *
605 storage_annotation(const slang_ir_node *n, const struct gl_program *prog)
606 {
607 #if ANNOTATE
608 const slang_ir_storage *st = n->Store;
609 static char s[100] = "";
610
611 if (!st)
612 return _mesa_strdup("");
613
614 switch (st->File) {
615 case PROGRAM_CONSTANT:
616 if (st->Index >= 0) {
617 const GLfloat *val = prog->Parameters->ParameterValues[st->Index];
618 if (st->Swizzle == SWIZZLE_NOOP)
619 sprintf(s, "{%g, %g, %g, %g}", val[0], val[1], val[2], val[3]);
620 else {
621 sprintf(s, "%g", val[GET_SWZ(st->Swizzle, 0)]);
622 }
623 }
624 break;
625 case PROGRAM_TEMPORARY:
626 if (n->Var)
627 sprintf(s, "%s", (char *) n->Var->a_name);
628 else
629 sprintf(s, "t[%d]", st->Index);
630 break;
631 case PROGRAM_STATE_VAR:
632 case PROGRAM_UNIFORM:
633 sprintf(s, "%s", prog->Parameters->Parameters[st->Index].Name);
634 break;
635 case PROGRAM_VARYING:
636 sprintf(s, "%s", prog->Varying->Parameters[st->Index].Name);
637 break;
638 case PROGRAM_INPUT:
639 sprintf(s, "input[%d]", st->Index);
640 break;
641 case PROGRAM_OUTPUT:
642 sprintf(s, "output[%d]", st->Index);
643 break;
644 default:
645 s[0] = 0;
646 }
647 return _mesa_strdup(s);
648 #else
649 return NULL;
650 #endif
651 }
652
653
654 /**
655 * Return an annotation string for an instruction.
656 */
657 static char *
658 instruction_annotation(gl_inst_opcode opcode, char *dstAnnot,
659 char *srcAnnot0, char *srcAnnot1, char *srcAnnot2)
660 {
661 #if ANNOTATE
662 const char *operator;
663 char *s;
664 int len = 50;
665
666 if (dstAnnot)
667 len += strlen(dstAnnot);
668 else
669 dstAnnot = _mesa_strdup("");
670
671 if (srcAnnot0)
672 len += strlen(srcAnnot0);
673 else
674 srcAnnot0 = _mesa_strdup("");
675
676 if (srcAnnot1)
677 len += strlen(srcAnnot1);
678 else
679 srcAnnot1 = _mesa_strdup("");
680
681 if (srcAnnot2)
682 len += strlen(srcAnnot2);
683 else
684 srcAnnot2 = _mesa_strdup("");
685
686 switch (opcode) {
687 case OPCODE_ADD:
688 operator = "+";
689 break;
690 case OPCODE_SUB:
691 operator = "-";
692 break;
693 case OPCODE_MUL:
694 operator = "*";
695 break;
696 case OPCODE_DP2:
697 operator = "DP2";
698 break;
699 case OPCODE_DP3:
700 operator = "DP3";
701 break;
702 case OPCODE_DP4:
703 operator = "DP4";
704 break;
705 case OPCODE_XPD:
706 operator = "XPD";
707 break;
708 case OPCODE_RSQ:
709 operator = "RSQ";
710 break;
711 case OPCODE_SGT:
712 operator = ">";
713 break;
714 default:
715 operator = ",";
716 }
717
718 s = (char *) malloc(len);
719 sprintf(s, "%s = %s %s %s %s", dstAnnot,
720 srcAnnot0, operator, srcAnnot1, srcAnnot2);
721 assert(_mesa_strlen(s) < len);
722
723 free(dstAnnot);
724 free(srcAnnot0);
725 free(srcAnnot1);
726 free(srcAnnot2);
727
728 return s;
729 #else
730 return NULL;
731 #endif
732 }
733
734
735 /**
736 * Emit an instruction that's just a comment.
737 */
738 static struct prog_instruction *
739 emit_comment(slang_emit_info *emitInfo, const char *comment)
740 {
741 struct prog_instruction *inst = new_instruction(emitInfo, OPCODE_NOP);
742 inst_comment(inst, comment);
743 return inst;
744 }
745
746
747 /**
748 * Generate code for a simple arithmetic instruction.
749 * Either 1, 2 or 3 operands.
750 */
751 static struct prog_instruction *
752 emit_arith(slang_emit_info *emitInfo, slang_ir_node *n)
753 {
754 const slang_ir_info *info = _slang_ir_info(n->Opcode);
755 struct prog_instruction *inst;
756 GLuint i;
757
758 assert(info);
759 assert(info->InstOpcode != OPCODE_NOP);
760
761 #if PEEPHOLE_OPTIMIZATIONS
762 /* Look for MAD opportunity */
763 if (info->NumParams == 2 &&
764 n->Opcode == IR_ADD && n->Children[0]->Opcode == IR_MUL) {
765 /* found pattern IR_ADD(IR_MUL(A, B), C) */
766 emit(emitInfo, n->Children[0]->Children[0]); /* A */
767 emit(emitInfo, n->Children[0]->Children[1]); /* B */
768 emit(emitInfo, n->Children[1]); /* C */
769 alloc_node_storage(emitInfo, n, -1); /* dest */
770
771 inst = emit_instruction(emitInfo,
772 OPCODE_MAD,
773 n->Store,
774 n->Children[0]->Children[0]->Store,
775 n->Children[0]->Children[1]->Store,
776 n->Children[1]->Store);
777
778 free_node_storage(emitInfo->vt, n->Children[0]->Children[0]);
779 free_node_storage(emitInfo->vt, n->Children[0]->Children[1]);
780 free_node_storage(emitInfo->vt, n->Children[1]);
781 return inst;
782 }
783
784 if (info->NumParams == 2 &&
785 n->Opcode == IR_ADD && n->Children[1]->Opcode == IR_MUL) {
786 /* found pattern IR_ADD(A, IR_MUL(B, C)) */
787 emit(emitInfo, n->Children[0]); /* A */
788 emit(emitInfo, n->Children[1]->Children[0]); /* B */
789 emit(emitInfo, n->Children[1]->Children[1]); /* C */
790 alloc_node_storage(emitInfo, n, -1); /* dest */
791
792 inst = emit_instruction(emitInfo,
793 OPCODE_MAD,
794 n->Store,
795 n->Children[1]->Children[0]->Store,
796 n->Children[1]->Children[1]->Store,
797 n->Children[0]->Store);
798
799 free_node_storage(emitInfo->vt, n->Children[1]->Children[0]);
800 free_node_storage(emitInfo->vt, n->Children[1]->Children[1]);
801 free_node_storage(emitInfo->vt, n->Children[0]);
802 return inst;
803 }
804 #endif
805
806 /* gen code for children, may involve temp allocation */
807 for (i = 0; i < info->NumParams; i++) {
808 emit(emitInfo, n->Children[i]);
809 if (!n->Children[i] || !n->Children[i]->Store) {
810 /* error recovery */
811 return NULL;
812 }
813 }
814
815 /* result storage */
816 alloc_node_storage(emitInfo, n, -1);
817
818 inst = emit_instruction(emitInfo,
819 info->InstOpcode,
820 n->Store, /* dest */
821 (info->NumParams > 0 ? n->Children[0]->Store : NULL),
822 (info->NumParams > 1 ? n->Children[1]->Store : NULL),
823 (info->NumParams > 2 ? n->Children[2]->Store : NULL)
824 );
825
826 /* free temps */
827 for (i = 0; i < info->NumParams; i++)
828 free_node_storage(emitInfo->vt, n->Children[i]);
829
830 return inst;
831 }
832
833
834 /**
835 * Emit code for == and != operators. These could normally be handled
836 * by emit_arith() except we need to be able to handle structure comparisons.
837 */
838 static struct prog_instruction *
839 emit_compare(slang_emit_info *emitInfo, slang_ir_node *n)
840 {
841 struct prog_instruction *inst = NULL;
842 GLint size;
843
844 assert(n->Opcode == IR_EQUAL || n->Opcode == IR_NOTEQUAL);
845
846 /* gen code for children */
847 emit(emitInfo, n->Children[0]);
848 emit(emitInfo, n->Children[1]);
849
850 if (n->Children[0]->Store->Size != n->Children[1]->Store->Size) {
851 slang_info_log_error(emitInfo->log, "invalid operands to == or !=");
852 return NULL;
853 }
854
855 /* final result is 1 bool */
856 if (!alloc_node_storage(emitInfo, n, 1))
857 return NULL;
858
859 size = n->Children[0]->Store->Size;
860
861 if (size == 1) {
862 gl_inst_opcode opcode = n->Opcode == IR_EQUAL ? OPCODE_SEQ : OPCODE_SNE;
863 inst = emit_instruction(emitInfo,
864 opcode,
865 n->Store, /* dest */
866 n->Children[0]->Store,
867 n->Children[1]->Store,
868 NULL);
869 }
870 else if (size <= 4) {
871 /* compare two vectors.
872 * Unfortunately, there's no instruction to compare vectors and
873 * return a scalar result. Do it with some compare and dot product
874 * instructions...
875 */
876 GLuint swizzle;
877 gl_inst_opcode dotOp;
878 slang_ir_storage tempStore;
879
880 if (!alloc_local_temp(emitInfo, &tempStore, 4)) {
881 return NULL;
882 /* out of temps */
883 }
884
885 if (size == 4) {
886 dotOp = OPCODE_DP4;
887 swizzle = SWIZZLE_XYZW;
888 }
889 else if (size == 3) {
890 dotOp = OPCODE_DP3;
891 swizzle = SWIZZLE_XYZW;
892 }
893 else {
894 assert(size == 2);
895 dotOp = OPCODE_DP3; /* XXX use OPCODE_DP2 eventually */
896 swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y);
897 }
898
899 /* Compute inequality (temp = (A != B)) */
900 inst = emit_instruction(emitInfo,
901 OPCODE_SNE,
902 &tempStore,
903 n->Children[0]->Store,
904 n->Children[1]->Store,
905 NULL);
906 inst_comment(inst, "Compare values");
907
908 /* Compute val = DOT(temp, temp) (reduction) */
909 inst = emit_instruction(emitInfo,
910 dotOp,
911 n->Store,
912 &tempStore,
913 &tempStore,
914 NULL);
915 inst->SrcReg[0].Swizzle = inst->SrcReg[1].Swizzle = swizzle; /*override*/
916 inst_comment(inst, "Reduce vec to bool");
917
918 _slang_free_temp(emitInfo->vt, &tempStore); /* free temp */
919
920 if (n->Opcode == IR_EQUAL) {
921 /* compute val = !val.x with SEQ val, val, 0; */
922 slang_ir_storage zero;
923 constant_to_storage(emitInfo, 0.0, &zero);
924 inst = emit_instruction(emitInfo,
925 OPCODE_SEQ,
926 n->Store, /* dest */
927 n->Store,
928 &zero,
929 NULL);
930 inst_comment(inst, "Invert true/false");
931 }
932 }
933 else {
934 /* size > 4, struct or array compare.
935 * XXX this won't work reliably for structs with padding!!
936 */
937 GLint i, num = (n->Children[0]->Store->Size + 3) / 4;
938 slang_ir_storage accTemp, sneTemp;
939
940 if (!alloc_local_temp(emitInfo, &accTemp, 4))
941 return NULL;
942
943 if (!alloc_local_temp(emitInfo, &sneTemp, 4))
944 return NULL;
945
946 for (i = 0; i < num; i++) {
947 slang_ir_storage srcStore0 = *n->Children[0]->Store;
948 slang_ir_storage srcStore1 = *n->Children[1]->Store;
949 srcStore0.Index += i;
950 srcStore1.Index += i;
951
952 if (i == 0) {
953 /* SNE accTemp, left[i], right[i] */
954 inst = emit_instruction(emitInfo, OPCODE_SNE,
955 &accTemp, /* dest */
956 &srcStore0,
957 &srcStore1,
958 NULL);
959 inst_comment(inst, "Begin struct/array comparison");
960 }
961 else {
962 /* SNE sneTemp, left[i], right[i] */
963 inst = emit_instruction(emitInfo, OPCODE_SNE,
964 &sneTemp, /* dest */
965 &srcStore0,
966 &srcStore1,
967 NULL);
968 /* ADD accTemp, accTemp, sneTemp; # like logical-OR */
969 inst = emit_instruction(emitInfo, OPCODE_ADD,
970 &accTemp, /* dest */
971 &accTemp,
972 &sneTemp,
973 NULL);
974 }
975 }
976
977 /* compute accTemp.x || accTemp.y || accTemp.z || accTemp.w with DOT4 */
978 inst = emit_instruction(emitInfo, OPCODE_DP4,
979 n->Store,
980 &accTemp,
981 &accTemp,
982 NULL);
983 inst_comment(inst, "End struct/array comparison");
984
985 if (n->Opcode == IR_EQUAL) {
986 /* compute tmp.x = !tmp.x via tmp.x = (tmp.x == 0) */
987 slang_ir_storage zero;
988 constant_to_storage(emitInfo, 0.0, &zero);
989 inst = emit_instruction(emitInfo, OPCODE_SEQ,
990 n->Store, /* dest */
991 n->Store,
992 &zero,
993 NULL);
994 inst_comment(inst, "Invert true/false");
995 }
996
997 _slang_free_temp(emitInfo->vt, &accTemp);
998 _slang_free_temp(emitInfo->vt, &sneTemp);
999 }
1000
1001 /* free temps */
1002 free_node_storage(emitInfo->vt, n->Children[0]);
1003 free_node_storage(emitInfo->vt, n->Children[1]);
1004
1005 return inst;
1006 }
1007
1008
1009
1010 /**
1011 * Generate code for an IR_CLAMP instruction.
1012 */
1013 static struct prog_instruction *
1014 emit_clamp(slang_emit_info *emitInfo, slang_ir_node *n)
1015 {
1016 struct prog_instruction *inst;
1017 slang_ir_node tmpNode;
1018
1019 assert(n->Opcode == IR_CLAMP);
1020 /* ch[0] = value
1021 * ch[1] = min limit
1022 * ch[2] = max limit
1023 */
1024
1025 inst = emit(emitInfo, n->Children[0]);
1026
1027 /* If lower limit == 0.0 and upper limit == 1.0,
1028 * set prev instruction's SaturateMode field to SATURATE_ZERO_ONE.
1029 * Else,
1030 * emit OPCODE_MIN, OPCODE_MAX sequence.
1031 */
1032 #if 0
1033 /* XXX this isn't quite finished yet */
1034 if (n->Children[1]->Opcode == IR_FLOAT &&
1035 n->Children[1]->Value[0] == 0.0 &&
1036 n->Children[1]->Value[1] == 0.0 &&
1037 n->Children[1]->Value[2] == 0.0 &&
1038 n->Children[1]->Value[3] == 0.0 &&
1039 n->Children[2]->Opcode == IR_FLOAT &&
1040 n->Children[2]->Value[0] == 1.0 &&
1041 n->Children[2]->Value[1] == 1.0 &&
1042 n->Children[2]->Value[2] == 1.0 &&
1043 n->Children[2]->Value[3] == 1.0) {
1044 if (!inst) {
1045 inst = prev_instruction(prog);
1046 }
1047 if (inst && inst->Opcode != OPCODE_NOP) {
1048 /* and prev instruction's DstReg matches n->Children[0]->Store */
1049 inst->SaturateMode = SATURATE_ZERO_ONE;
1050 n->Store = n->Children[0]->Store;
1051 return inst;
1052 }
1053 }
1054 #endif
1055
1056 if (!alloc_node_storage(emitInfo, n, n->Children[0]->Store->Size))
1057 return NULL;
1058
1059 emit(emitInfo, n->Children[1]);
1060 emit(emitInfo, n->Children[2]);
1061
1062 /* Some GPUs don't allow reading from output registers. So if the
1063 * dest for this clamp() is an output reg, we can't use that reg for
1064 * the intermediate result. Use a temp register instead.
1065 */
1066 _mesa_bzero(&tmpNode, sizeof(tmpNode));
1067 alloc_node_storage(emitInfo, &tmpNode, n->Store->Size);
1068
1069 /* tmp = max(ch[0], ch[1]) */
1070 inst = emit_instruction(emitInfo, OPCODE_MAX,
1071 tmpNode.Store, /* dest */
1072 n->Children[0]->Store,
1073 n->Children[1]->Store,
1074 NULL);
1075
1076 /* n->dest = min(tmp, ch[2]) */
1077 inst = emit_instruction(emitInfo, OPCODE_MIN,
1078 n->Store, /* dest */
1079 tmpNode.Store,
1080 n->Children[2]->Store,
1081 NULL);
1082
1083 free_node_storage(emitInfo->vt, &tmpNode);
1084
1085 return inst;
1086 }
1087
1088
1089 static struct prog_instruction *
1090 emit_negation(slang_emit_info *emitInfo, slang_ir_node *n)
1091 {
1092 /* Implement as MOV dst, -src; */
1093 /* XXX we could look at the previous instruction and in some circumstances
1094 * modify it to accomplish the negation.
1095 */
1096 struct prog_instruction *inst;
1097
1098 emit(emitInfo, n->Children[0]);
1099
1100 if (!alloc_node_storage(emitInfo, n, n->Children[0]->Store->Size))
1101 return NULL;
1102
1103 inst = emit_instruction(emitInfo,
1104 OPCODE_MOV,
1105 n->Store, /* dest */
1106 n->Children[0]->Store,
1107 NULL,
1108 NULL);
1109 inst->SrcReg[0].NegateBase = NEGATE_XYZW;
1110 return inst;
1111 }
1112
1113
1114 static struct prog_instruction *
1115 emit_label(slang_emit_info *emitInfo, const slang_ir_node *n)
1116 {
1117 assert(n->Label);
1118 #if 0
1119 /* XXX this fails in loop tail code - investigate someday */
1120 assert(_slang_label_get_location(n->Label) < 0);
1121 _slang_label_set_location(n->Label, emitInfo->prog->NumInstructions,
1122 emitInfo->prog);
1123 #else
1124 if (_slang_label_get_location(n->Label) < 0)
1125 _slang_label_set_location(n->Label, emitInfo->prog->NumInstructions,
1126 emitInfo->prog);
1127 #endif
1128 return NULL;
1129 }
1130
1131
1132 /**
1133 * Emit code for a function call.
1134 * Note that for each time a function is called, we emit the function's
1135 * body code again because the set of available registers may be different.
1136 */
1137 static struct prog_instruction *
1138 emit_fcall(slang_emit_info *emitInfo, slang_ir_node *n)
1139 {
1140 struct gl_program *progSave;
1141 struct prog_instruction *inst;
1142 GLuint subroutineId;
1143 GLuint maxInstSave;
1144
1145 assert(n->Opcode == IR_CALL);
1146 assert(n->Label);
1147
1148 /* save/push cur program */
1149 maxInstSave = emitInfo->MaxInstructions;
1150 progSave = emitInfo->prog;
1151
1152 emitInfo->prog = new_subroutine(emitInfo, &subroutineId);
1153 emitInfo->MaxInstructions = emitInfo->prog->NumInstructions;
1154
1155 _slang_label_set_location(n->Label, emitInfo->prog->NumInstructions,
1156 emitInfo->prog);
1157
1158 if (emitInfo->EmitBeginEndSub) {
1159 /* BGNSUB isn't a real instruction.
1160 * We require a label (i.e. "foobar:") though, if we're going to
1161 * print the program in the NV format. The BNGSUB instruction is
1162 * really just a NOP to attach the label to.
1163 */
1164 inst = new_instruction(emitInfo, OPCODE_BGNSUB);
1165 inst_comment(inst, n->Label->Name);
1166 }
1167
1168 /* body of function: */
1169 emit(emitInfo, n->Children[0]);
1170 n->Store = n->Children[0]->Store;
1171
1172 /* add RET instruction now, if needed */
1173 inst = prev_instruction(emitInfo);
1174 if (inst && inst->Opcode != OPCODE_RET) {
1175 inst = new_instruction(emitInfo, OPCODE_RET);
1176 }
1177
1178 if (emitInfo->EmitBeginEndSub) {
1179 inst = new_instruction(emitInfo, OPCODE_ENDSUB);
1180 inst_comment(inst, n->Label->Name);
1181 }
1182
1183 /* pop/restore cur program */
1184 emitInfo->prog = progSave;
1185 emitInfo->MaxInstructions = maxInstSave;
1186
1187 /* emit the function call */
1188 inst = new_instruction(emitInfo, OPCODE_CAL);
1189 /* The branch target is just the subroutine number (changed later) */
1190 inst->BranchTarget = subroutineId;
1191 inst_comment(inst, n->Label->Name);
1192 assert(inst->BranchTarget >= 0);
1193
1194 return inst;
1195 }
1196
1197
1198 /**
1199 * Emit code for a 'return' statement.
1200 */
1201 static struct prog_instruction *
1202 emit_return(slang_emit_info *emitInfo, slang_ir_node *n)
1203 {
1204 struct prog_instruction *inst;
1205 assert(n);
1206 assert(n->Opcode == IR_RETURN);
1207 assert(n->Label);
1208 inst = new_instruction(emitInfo, OPCODE_RET);
1209 inst->DstReg.CondMask = COND_TR; /* always return */
1210 return inst;
1211 }
1212
1213
1214 static struct prog_instruction *
1215 emit_kill(slang_emit_info *emitInfo)
1216 {
1217 struct gl_fragment_program *fp;
1218 struct prog_instruction *inst;
1219 /* NV-KILL - discard fragment depending on condition code.
1220 * Note that ARB-KILL depends on sign of vector operand.
1221 */
1222 inst = new_instruction(emitInfo, OPCODE_KIL_NV);
1223 inst->DstReg.CondMask = COND_TR; /* always kill */
1224
1225 assert(emitInfo->prog->Target == GL_FRAGMENT_PROGRAM_ARB);
1226 fp = (struct gl_fragment_program *) emitInfo->prog;
1227 fp->UsesKill = GL_TRUE;
1228
1229 return inst;
1230 }
1231
1232
1233 static struct prog_instruction *
1234 emit_tex(slang_emit_info *emitInfo, slang_ir_node *n)
1235 {
1236 struct prog_instruction *inst;
1237 gl_inst_opcode opcode;
1238
1239 if (n->Opcode == IR_TEX) {
1240 opcode = OPCODE_TEX;
1241 }
1242 else if (n->Opcode == IR_TEXB) {
1243 opcode = OPCODE_TXB;
1244 }
1245 else {
1246 assert(n->Opcode == IR_TEXP);
1247 opcode = OPCODE_TXP;
1248 }
1249
1250 /* emit code for the texcoord operand */
1251 (void) emit(emitInfo, n->Children[1]);
1252
1253 /* alloc storage for result of texture fetch */
1254 if (!alloc_node_storage(emitInfo, n, 4))
1255 return NULL;
1256
1257 /* emit TEX instruction; Child[1] is the texcoord */
1258 inst = emit_instruction(emitInfo,
1259 opcode,
1260 n->Store,
1261 n->Children[1]->Store,
1262 NULL,
1263 NULL);
1264
1265 /* Child[0] is the sampler (a uniform which'll indicate the texture unit) */
1266 assert(n->Children[0]->Store);
1267 /* Store->Index is the sampler index */
1268 assert(n->Children[0]->Store->Index >= 0);
1269 /* Store->Size is the texture target */
1270 assert(n->Children[0]->Store->Size >= TEXTURE_1D_INDEX);
1271 assert(n->Children[0]->Store->Size <= TEXTURE_RECT_INDEX);
1272
1273 inst->TexSrcTarget = n->Children[0]->Store->Size;
1274 inst->TexSrcUnit = n->Children[0]->Store->Index; /* i.e. uniform's index */
1275
1276 return inst;
1277 }
1278
1279
1280 /**
1281 * Assignment/copy
1282 */
1283 static struct prog_instruction *
1284 emit_copy(slang_emit_info *emitInfo, slang_ir_node *n)
1285 {
1286 struct prog_instruction *inst;
1287
1288 assert(n->Opcode == IR_COPY);
1289
1290 /* lhs */
1291 emit(emitInfo, n->Children[0]);
1292 if (!n->Children[0]->Store || n->Children[0]->Store->Index < 0) {
1293 /* an error should have been already recorded */
1294 return NULL;
1295 }
1296
1297 /* rhs */
1298 assert(n->Children[1]);
1299 inst = emit(emitInfo, n->Children[1]);
1300
1301 if (!n->Children[1]->Store || n->Children[1]->Store->Index < 0) {
1302 if (!emitInfo->log->text) {
1303 slang_info_log_error(emitInfo->log, "invalid assignment");
1304 }
1305 return NULL;
1306 }
1307
1308 assert(n->Children[1]->Store->Index >= 0);
1309
1310 /*assert(n->Children[0]->Store->Size == n->Children[1]->Store->Size);*/
1311
1312 n->Store = n->Children[0]->Store;
1313
1314 if (n->Store->File == PROGRAM_SAMPLER) {
1315 /* no code generated for sampler assignments,
1316 * just copy the sampler index at compile time.
1317 */
1318 n->Store->Index = n->Children[1]->Store->Index;
1319 return NULL;
1320 }
1321
1322 #if PEEPHOLE_OPTIMIZATIONS
1323 if (inst &&
1324 _slang_is_temp(emitInfo->vt, n->Children[1]->Store) &&
1325 (inst->DstReg.File == n->Children[1]->Store->File) &&
1326 (inst->DstReg.Index == n->Children[1]->Store->Index) &&
1327 !n->Children[0]->Store->IsIndirect &&
1328 n->Children[0]->Store->Size <= 4) {
1329 /* Peephole optimization:
1330 * The Right-Hand-Side has its results in a temporary place.
1331 * Modify the RHS (and the prev instruction) to store its results
1332 * in the destination specified by n->Children[0].
1333 * Then, this MOVE is a no-op.
1334 * Ex:
1335 * MUL tmp, x, y;
1336 * MOV a, tmp;
1337 * becomes:
1338 * MUL a, x, y;
1339 */
1340 if (n->Children[1]->Opcode != IR_SWIZZLE)
1341 _slang_free_temp(emitInfo->vt, n->Children[1]->Store);
1342 *n->Children[1]->Store = *n->Children[0]->Store;
1343
1344 /* fixup the previous instruction (which stored the RHS result) */
1345 assert(n->Children[0]->Store->Index >= 0);
1346
1347 storage_to_dst_reg(&inst->DstReg, n->Children[0]->Store);
1348 return inst;
1349 }
1350 else
1351 #endif
1352 {
1353 if (n->Children[0]->Store->Size > 4) {
1354 /* move matrix/struct etc (block of registers) */
1355 slang_ir_storage dstStore = *n->Children[0]->Store;
1356 slang_ir_storage srcStore = *n->Children[1]->Store;
1357 GLint size = srcStore.Size;
1358 ASSERT(n->Children[1]->Store->Swizzle == SWIZZLE_NOOP);
1359 dstStore.Size = 4;
1360 srcStore.Size = 4;
1361 while (size >= 4) {
1362 inst = emit_instruction(emitInfo, OPCODE_MOV,
1363 &dstStore,
1364 &srcStore,
1365 NULL,
1366 NULL);
1367 inst_comment(inst, "IR_COPY block");
1368 srcStore.Index++;
1369 dstStore.Index++;
1370 size -= 4;
1371 }
1372 }
1373 else {
1374 /* single register move */
1375 char *srcAnnot, *dstAnnot;
1376 assert(n->Children[0]->Store->Index >= 0);
1377 inst = emit_instruction(emitInfo, OPCODE_MOV,
1378 n->Children[0]->Store, /* dest */
1379 n->Children[1]->Store,
1380 NULL,
1381 NULL);
1382 dstAnnot = storage_annotation(n->Children[0], emitInfo->prog);
1383 srcAnnot = storage_annotation(n->Children[1], emitInfo->prog);
1384 inst->Comment = instruction_annotation(inst->Opcode, dstAnnot,
1385 srcAnnot, NULL, NULL);
1386 }
1387 free_node_storage(emitInfo->vt, n->Children[1]);
1388 return inst;
1389 }
1390 }
1391
1392
1393 /**
1394 * An IR_COND node wraps a boolean expression which is used by an
1395 * IF or WHILE test. This is where we'll set condition codes, if needed.
1396 */
1397 static struct prog_instruction *
1398 emit_cond(slang_emit_info *emitInfo, slang_ir_node *n)
1399 {
1400 struct prog_instruction *inst;
1401
1402 assert(n->Opcode == IR_COND);
1403
1404 if (!n->Children[0])
1405 return NULL;
1406
1407 /* emit code for the expression */
1408 inst = emit(emitInfo, n->Children[0]);
1409
1410 if (!n->Children[0]->Store) {
1411 /* error recovery */
1412 return NULL;
1413 }
1414
1415 assert(n->Children[0]->Store);
1416 /*assert(n->Children[0]->Store->Size == 1);*/
1417
1418 if (emitInfo->EmitCondCodes) {
1419 if (inst &&
1420 n->Children[0]->Store &&
1421 inst->DstReg.File == n->Children[0]->Store->File &&
1422 inst->DstReg.Index == n->Children[0]->Store->Index) {
1423 /* The previous instruction wrote to the register who's value
1424 * we're testing. Just fix that instruction so that the
1425 * condition codes are computed.
1426 */
1427 inst->CondUpdate = GL_TRUE;
1428 n->Store = n->Children[0]->Store;
1429 return inst;
1430 }
1431 else {
1432 /* This'll happen for things like "if (i) ..." where no code
1433 * is normally generated for the expression "i".
1434 * Generate a move instruction just to set condition codes.
1435 */
1436 if (!alloc_node_storage(emitInfo, n, 1))
1437 return NULL;
1438 inst = emit_instruction(emitInfo, OPCODE_MOV,
1439 n->Store, /* dest */
1440 n->Children[0]->Store,
1441 NULL,
1442 NULL);
1443 inst->CondUpdate = GL_TRUE;
1444 inst_comment(inst, "COND expr");
1445 _slang_free_temp(emitInfo->vt, n->Store);
1446 return inst;
1447 }
1448 }
1449 else {
1450 /* No-op: the boolean result of the expression is in a regular reg */
1451 n->Store = n->Children[0]->Store;
1452 return inst;
1453 }
1454 }
1455
1456
1457 /**
1458 * Logical-NOT
1459 */
1460 static struct prog_instruction *
1461 emit_not(slang_emit_info *emitInfo, slang_ir_node *n)
1462 {
1463 static const struct {
1464 gl_inst_opcode op, opNot;
1465 } operators[] = {
1466 { OPCODE_SLT, OPCODE_SGE },
1467 { OPCODE_SLE, OPCODE_SGT },
1468 { OPCODE_SGT, OPCODE_SLE },
1469 { OPCODE_SGE, OPCODE_SLT },
1470 { OPCODE_SEQ, OPCODE_SNE },
1471 { OPCODE_SNE, OPCODE_SEQ },
1472 { 0, 0 }
1473 };
1474 struct prog_instruction *inst;
1475 slang_ir_storage zero;
1476 GLuint i;
1477
1478 /* child expr */
1479 inst = emit(emitInfo, n->Children[0]);
1480
1481 #if PEEPHOLE_OPTIMIZATIONS
1482 if (inst) {
1483 /* if the prev instruction was a comparison instruction, invert it */
1484 for (i = 0; operators[i].op; i++) {
1485 if (inst->Opcode == operators[i].op) {
1486 inst->Opcode = operators[i].opNot;
1487 n->Store = n->Children[0]->Store;
1488 return inst;
1489 }
1490 }
1491 }
1492 #endif
1493
1494 /* else, invert using SEQ (v = v == 0) */
1495 if (!alloc_node_storage(emitInfo, n, n->Children[0]->Store->Size))
1496 return NULL;
1497
1498 constant_to_storage(emitInfo, 0.0, &zero);
1499 inst = emit_instruction(emitInfo,
1500 OPCODE_SEQ,
1501 n->Store,
1502 n->Children[0]->Store,
1503 &zero,
1504 NULL);
1505 inst_comment(inst, "NOT");
1506
1507 free_node_storage(emitInfo->vt, n->Children[0]);
1508
1509 return inst;
1510 }
1511
1512
1513 static struct prog_instruction *
1514 emit_if(slang_emit_info *emitInfo, slang_ir_node *n)
1515 {
1516 struct gl_program *prog = emitInfo->prog;
1517 GLuint ifInstLoc, elseInstLoc = 0;
1518 GLuint condWritemask = 0;
1519
1520 /* emit condition expression code */
1521 {
1522 struct prog_instruction *inst;
1523 inst = emit(emitInfo, n->Children[0]);
1524 if (emitInfo->EmitCondCodes) {
1525 if (!inst) {
1526 /* error recovery */
1527 return NULL;
1528 }
1529 condWritemask = inst->DstReg.WriteMask;
1530 }
1531 }
1532
1533 if (!n->Children[0]->Store)
1534 return NULL;
1535
1536 #if 0
1537 assert(n->Children[0]->Store->Size == 1); /* a bool! */
1538 #endif
1539
1540 ifInstLoc = prog->NumInstructions;
1541 if (emitInfo->EmitHighLevelInstructions) {
1542 if (emitInfo->EmitCondCodes) {
1543 /* IF condcode THEN ... */
1544 struct prog_instruction *ifInst;
1545 ifInst = new_instruction(emitInfo, OPCODE_IF);
1546 ifInst->DstReg.CondMask = COND_NE; /* if cond is non-zero */
1547 /* only test the cond code (1 of 4) that was updated by the
1548 * previous instruction.
1549 */
1550 ifInst->DstReg.CondSwizzle = writemask_to_swizzle(condWritemask);
1551 }
1552 else {
1553 /* IF src[0] THEN ... */
1554 emit_instruction(emitInfo, OPCODE_IF,
1555 NULL, /* dst */
1556 n->Children[0]->Store, /* op0 */
1557 NULL,
1558 NULL);
1559 }
1560 }
1561 else {
1562 /* conditional jump to else, or endif */
1563 struct prog_instruction *ifInst = new_instruction(emitInfo, OPCODE_BRA);
1564 ifInst->DstReg.CondMask = COND_EQ; /* BRA if cond is zero */
1565 inst_comment(ifInst, "if zero");
1566 ifInst->DstReg.CondSwizzle = writemask_to_swizzle(condWritemask);
1567 }
1568
1569 /* if body */
1570 emit(emitInfo, n->Children[1]);
1571
1572 if (n->Children[2]) {
1573 /* have else body */
1574 elseInstLoc = prog->NumInstructions;
1575 if (emitInfo->EmitHighLevelInstructions) {
1576 (void) new_instruction(emitInfo, OPCODE_ELSE);
1577 }
1578 else {
1579 /* jump to endif instruction */
1580 struct prog_instruction *inst;
1581 inst = new_instruction(emitInfo, OPCODE_BRA);
1582 inst_comment(inst, "else");
1583 inst->DstReg.CondMask = COND_TR; /* always branch */
1584 }
1585 prog->Instructions[ifInstLoc].BranchTarget = prog->NumInstructions;
1586 emit(emitInfo, n->Children[2]);
1587 }
1588 else {
1589 /* no else body */
1590 prog->Instructions[ifInstLoc].BranchTarget = prog->NumInstructions;
1591 }
1592
1593 if (emitInfo->EmitHighLevelInstructions) {
1594 (void) new_instruction(emitInfo, OPCODE_ENDIF);
1595 }
1596
1597 if (n->Children[2]) {
1598 prog->Instructions[elseInstLoc].BranchTarget = prog->NumInstructions;
1599 }
1600 return NULL;
1601 }
1602
1603
1604 static struct prog_instruction *
1605 emit_loop(slang_emit_info *emitInfo, slang_ir_node *n)
1606 {
1607 struct gl_program *prog = emitInfo->prog;
1608 struct prog_instruction *endInst;
1609 GLuint beginInstLoc, tailInstLoc, endInstLoc;
1610 slang_ir_node *ir;
1611
1612 /* emit OPCODE_BGNLOOP */
1613 beginInstLoc = prog->NumInstructions;
1614 if (emitInfo->EmitHighLevelInstructions) {
1615 (void) new_instruction(emitInfo, OPCODE_BGNLOOP);
1616 }
1617
1618 /* body */
1619 emit(emitInfo, n->Children[0]);
1620
1621 /* tail */
1622 tailInstLoc = prog->NumInstructions;
1623 if (n->Children[1]) {
1624 if (emitInfo->EmitComments)
1625 emit_comment(emitInfo, "Loop tail code:");
1626 emit(emitInfo, n->Children[1]);
1627 }
1628
1629 endInstLoc = prog->NumInstructions;
1630 if (emitInfo->EmitHighLevelInstructions) {
1631 /* emit OPCODE_ENDLOOP */
1632 endInst = new_instruction(emitInfo, OPCODE_ENDLOOP);
1633 }
1634 else {
1635 /* emit unconditional BRA-nch */
1636 endInst = new_instruction(emitInfo, OPCODE_BRA);
1637 endInst->DstReg.CondMask = COND_TR; /* always true */
1638 }
1639 /* ENDLOOP's BranchTarget points to the BGNLOOP inst */
1640 endInst->BranchTarget = beginInstLoc;
1641
1642 if (emitInfo->EmitHighLevelInstructions) {
1643 /* BGNLOOP's BranchTarget points to the ENDLOOP inst */
1644 prog->Instructions[beginInstLoc].BranchTarget = prog->NumInstructions -1;
1645 }
1646
1647 /* Done emitting loop code. Now walk over the loop's linked list of
1648 * BREAK and CONT nodes, filling in their BranchTarget fields (which
1649 * will point to the ENDLOOP+1 or BGNLOOP instructions, respectively).
1650 */
1651 for (ir = n->List; ir; ir = ir->List) {
1652 struct prog_instruction *inst = prog->Instructions + ir->InstLocation;
1653 assert(inst->BranchTarget < 0);
1654 if (ir->Opcode == IR_BREAK ||
1655 ir->Opcode == IR_BREAK_IF_TRUE) {
1656 assert(inst->Opcode == OPCODE_BRK ||
1657 inst->Opcode == OPCODE_BRA);
1658 /* go to instruction after end of loop */
1659 inst->BranchTarget = endInstLoc + 1;
1660 }
1661 else {
1662 assert(ir->Opcode == IR_CONT ||
1663 ir->Opcode == IR_CONT_IF_TRUE);
1664 assert(inst->Opcode == OPCODE_CONT ||
1665 inst->Opcode == OPCODE_BRA);
1666 /* go to instruction at tail of loop */
1667 inst->BranchTarget = endInstLoc;
1668 }
1669 }
1670 return NULL;
1671 }
1672
1673
1674 /**
1675 * Unconditional "continue" or "break" statement.
1676 * Either OPCODE_CONT, OPCODE_BRK or OPCODE_BRA will be emitted.
1677 */
1678 static struct prog_instruction *
1679 emit_cont_break(slang_emit_info *emitInfo, slang_ir_node *n)
1680 {
1681 gl_inst_opcode opcode;
1682 struct prog_instruction *inst;
1683
1684 if (n->Opcode == IR_CONT) {
1685 /* we need to execute the loop's tail code before doing CONT */
1686 assert(n->Parent);
1687 assert(n->Parent->Opcode == IR_LOOP);
1688 if (n->Parent->Children[1]) {
1689 /* emit tail code */
1690 if (emitInfo->EmitComments) {
1691 emit_comment(emitInfo, "continue - tail code:");
1692 }
1693 emit(emitInfo, n->Parent->Children[1]);
1694 }
1695 }
1696
1697 /* opcode selection */
1698 if (emitInfo->EmitHighLevelInstructions) {
1699 opcode = (n->Opcode == IR_CONT) ? OPCODE_CONT : OPCODE_BRK;
1700 }
1701 else {
1702 opcode = OPCODE_BRA;
1703 }
1704 n->InstLocation = emitInfo->prog->NumInstructions;
1705 inst = new_instruction(emitInfo, opcode);
1706 inst->DstReg.CondMask = COND_TR; /* always true */
1707 return inst;
1708 }
1709
1710
1711 /**
1712 * Conditional "continue" or "break" statement.
1713 * Either OPCODE_CONT, OPCODE_BRK or OPCODE_BRA will be emitted.
1714 */
1715 static struct prog_instruction *
1716 emit_cont_break_if_true(slang_emit_info *emitInfo, slang_ir_node *n)
1717 {
1718 struct prog_instruction *inst;
1719
1720 assert(n->Opcode == IR_CONT_IF_TRUE ||
1721 n->Opcode == IR_BREAK_IF_TRUE);
1722
1723 /* evaluate condition expr, setting cond codes */
1724 inst = emit(emitInfo, n->Children[0]);
1725 if (emitInfo->EmitCondCodes) {
1726 assert(inst);
1727 inst->CondUpdate = GL_TRUE;
1728 }
1729
1730 n->InstLocation = emitInfo->prog->NumInstructions;
1731
1732 /* opcode selection */
1733 if (emitInfo->EmitHighLevelInstructions) {
1734 const gl_inst_opcode opcode
1735 = (n->Opcode == IR_CONT_IF_TRUE) ? OPCODE_CONT : OPCODE_BRK;
1736 if (emitInfo->EmitCondCodes) {
1737 /* Get the writemask from the previous instruction which set
1738 * the condcodes. Use that writemask as the CondSwizzle.
1739 */
1740 const GLuint condWritemask = inst->DstReg.WriteMask;
1741 inst = new_instruction(emitInfo, opcode);
1742 inst->DstReg.CondMask = COND_NE;
1743 inst->DstReg.CondSwizzle = writemask_to_swizzle(condWritemask);
1744 return inst;
1745 }
1746 else {
1747 /* IF reg
1748 * BRK/CONT;
1749 * ENDIF
1750 */
1751 GLint ifInstLoc;
1752 ifInstLoc = emitInfo->prog->NumInstructions;
1753 inst = emit_instruction(emitInfo, OPCODE_IF,
1754 NULL, /* dest */
1755 n->Children[0]->Store,
1756 NULL,
1757 NULL);
1758 n->InstLocation = emitInfo->prog->NumInstructions;
1759
1760 inst = new_instruction(emitInfo, opcode);
1761 inst = new_instruction(emitInfo, OPCODE_ENDIF);
1762
1763 emitInfo->prog->Instructions[ifInstLoc].BranchTarget
1764 = emitInfo->prog->NumInstructions;
1765 return inst;
1766 }
1767 }
1768 else {
1769 const GLuint condWritemask = inst->DstReg.WriteMask;
1770 assert(emitInfo->EmitCondCodes);
1771 inst = new_instruction(emitInfo, OPCODE_BRA);
1772 inst->DstReg.CondMask = COND_NE;
1773 inst->DstReg.CondSwizzle = writemask_to_swizzle(condWritemask);
1774 return inst;
1775 }
1776 }
1777
1778
1779 static struct prog_instruction *
1780 emit_swizzle(slang_emit_info *emitInfo, slang_ir_node *n)
1781 {
1782 struct prog_instruction *inst;
1783
1784 inst = emit(emitInfo, n->Children[0]);
1785
1786 #if 0
1787 assert(n->Store->Parent);
1788 /* Apply this node's swizzle to parent's storage */
1789 GLuint swizzle = n->Store->Swizzle;
1790 _slang_copy_ir_storage(n->Store, n->Store->Parent);
1791 n->Store->Swizzle = _slang_swizzle_swizzle(n->Store->Swizzle, swizzle);
1792 assert(!n->Store->Parent);
1793 #endif
1794 return inst;
1795 }
1796
1797
1798 /**
1799 * Dereference array element: element == array[index]
1800 * This basically involves emitting code for computing the array index
1801 * and updating the node/element's storage info.
1802 */
1803 static struct prog_instruction *
1804 emit_array_element(slang_emit_info *emitInfo, slang_ir_node *n)
1805 {
1806 slang_ir_storage *arrayStore, *indexStore;
1807 const int elemSize = n->Store->Size; /* number of floats */
1808 const GLint elemSizeVec = (elemSize + 3) / 4; /* number of vec4 */
1809 struct prog_instruction *inst;
1810
1811 assert(n->Opcode == IR_ELEMENT);
1812 assert(elemSize > 0);
1813
1814 /* special case for built-in state variables, like light state */
1815 {
1816 slang_ir_storage *root = n->Store;
1817 assert(!root->Parent);
1818 while (root->Parent)
1819 root = root->Parent;
1820
1821 if (root->File == PROGRAM_STATE_VAR) {
1822 GLint index = _slang_alloc_statevar(n, emitInfo->prog->Parameters);
1823 assert(n->Store->Index == index);
1824 return NULL;
1825 }
1826 }
1827
1828 /* do codegen for array itself */
1829 emit(emitInfo, n->Children[0]);
1830 arrayStore = n->Children[0]->Store;
1831
1832 /* The initial array element storage is the array's storage,
1833 * then modified below.
1834 */
1835 _slang_copy_ir_storage(n->Store, arrayStore);
1836
1837
1838 if (n->Children[1]->Opcode == IR_FLOAT) {
1839 /* Constant array index */
1840 const GLint element = (GLint) n->Children[1]->Value[0];
1841
1842 /* this element's storage is the array's storage, plus constant offset */
1843 n->Store->Index += elemSizeVec * element;
1844 }
1845 else {
1846 /* Variable array index */
1847
1848 /* do codegen for array index expression */
1849 emit(emitInfo, n->Children[1]);
1850 indexStore = n->Children[1]->Store;
1851
1852 if (indexStore->IsIndirect) {
1853 /* need to put the array index into a temporary since we can't
1854 * directly support a[b[i]] constructs.
1855 */
1856
1857
1858 /*indexStore = tempstore();*/
1859 }
1860
1861
1862 if (elemSize > 4) {
1863 /* need to multiply array index by array element size */
1864 struct prog_instruction *inst;
1865 slang_ir_storage *indexTemp;
1866 slang_ir_storage elemSizeStore;
1867
1868 /* allocate 1 float indexTemp */
1869 indexTemp = _slang_new_ir_storage(PROGRAM_TEMPORARY, -1, 1);
1870 _slang_alloc_temp(emitInfo->vt, indexTemp);
1871
1872 /* allocate a constant containing the element size */
1873 constant_to_storage(emitInfo, (float) elemSizeVec, &elemSizeStore);
1874
1875 /* multiply array index by element size */
1876 inst = emit_instruction(emitInfo,
1877 OPCODE_MUL,
1878 indexTemp, /* dest */
1879 indexStore, /* the index */
1880 &elemSizeStore,
1881 NULL);
1882
1883 indexStore = indexTemp;
1884 }
1885
1886 if (arrayStore->IsIndirect) {
1887 /* ex: in a[i][j], a[i] (the arrayStore) is indirect */
1888 /* Need to add indexStore to arrayStore->Indirect store */
1889 slang_ir_storage indirectArray;
1890 slang_ir_storage *indexTemp;
1891
1892 _slang_init_ir_storage(&indirectArray,
1893 arrayStore->IndirectFile,
1894 arrayStore->IndirectIndex,
1895 1,
1896 arrayStore->IndirectSwizzle);
1897
1898 /* allocate 1 float indexTemp */
1899 indexTemp = _slang_new_ir_storage(PROGRAM_TEMPORARY, -1, 1);
1900 _slang_alloc_temp(emitInfo->vt, indexTemp);
1901
1902 inst = emit_instruction(emitInfo,
1903 OPCODE_ADD,
1904 indexTemp, /* dest */
1905 indexStore, /* the index */
1906 &indirectArray, /* indirect array base */
1907 NULL);
1908
1909 indexStore = indexTemp;
1910 }
1911
1912 /* update the array element storage info */
1913 n->Store->IsIndirect = GL_TRUE;
1914 n->Store->IndirectFile = indexStore->File;
1915 n->Store->IndirectIndex = indexStore->Index;
1916 n->Store->IndirectSwizzle = indexStore->Swizzle;
1917 }
1918
1919 n->Store->Size = elemSize;
1920
1921 return NULL; /* no instruction */
1922 }
1923
1924
1925 /**
1926 * Resolve storage for accessing a structure field.
1927 */
1928 static struct prog_instruction *
1929 emit_struct_field(slang_emit_info *emitInfo, slang_ir_node *n)
1930 {
1931 slang_ir_storage *root = n->Store;
1932 GLint fieldOffset, fieldSize;
1933
1934 assert(n->Opcode == IR_FIELD);
1935
1936 assert(!root->Parent);
1937 while (root->Parent)
1938 root = root->Parent;
1939
1940 /* If this is the field of a state var, allocate constant/uniform
1941 * storage for it now if we haven't already.
1942 * Note that we allocate storage (uniform/constant slots) for state
1943 * variables here rather than at declaration time so we only allocate
1944 * space for the ones that we actually use!
1945 */
1946 if (root->File == PROGRAM_STATE_VAR) {
1947 root->Index = _slang_alloc_statevar(n, emitInfo->prog->Parameters);
1948 if (root->Index < 0) {
1949 slang_info_log_error(emitInfo->log, "Error parsing state variable");
1950 return NULL;
1951 }
1952 return NULL;
1953 }
1954 else {
1955 /* do codegen for struct */
1956 emit(emitInfo, n->Children[0]);
1957 assert(n->Children[0]->Store->Index >= 0);
1958 }
1959
1960 fieldOffset = n->Store->Index;
1961 fieldSize = n->Store->Size;
1962
1963 _slang_copy_ir_storage(n->Store, n->Children[0]->Store);
1964
1965 n->Store->Index = n->Children[0]->Store->Index + fieldOffset / 4;
1966 /* XXX test this:
1967 n->Store->Index += fieldOffset / 4;
1968 */
1969
1970 switch (fieldSize) {
1971 case 1:
1972 {
1973 GLint swz = fieldOffset % 4;
1974 n->Store->Swizzle = MAKE_SWIZZLE4(swz, swz, swz, swz);
1975 }
1976 break;
1977 case 2:
1978 n->Store->Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
1979 SWIZZLE_NIL, SWIZZLE_NIL);
1980 break;
1981 case 3:
1982 n->Store->Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
1983 SWIZZLE_Z, SWIZZLE_NIL);
1984 break;
1985 default:
1986 n->Store->Swizzle = SWIZZLE_XYZW;
1987 }
1988
1989 assert(n->Store->Index >= 0);
1990
1991 return NULL; /* no instruction */
1992 }
1993
1994
1995 /**
1996 * Emit code for a variable declaration.
1997 * This usually doesn't result in any code generation, but just
1998 * memory allocation.
1999 */
2000 static struct prog_instruction *
2001 emit_var_decl(slang_emit_info *emitInfo, slang_ir_node *n)
2002 {
2003 assert(n->Store);
2004 assert(n->Store->File != PROGRAM_UNDEFINED);
2005 assert(n->Store->Size > 0);
2006 /*assert(n->Store->Index < 0);*/
2007
2008 if (!n->Var || n->Var->isTemp) {
2009 /* a nameless/temporary variable, will be freed after first use */
2010 /*NEW*/
2011 if (n->Store->Index < 0 && !_slang_alloc_temp(emitInfo->vt, n->Store)) {
2012 slang_info_log_error(emitInfo->log,
2013 "Ran out of registers, too many temporaries");
2014 return NULL;
2015 }
2016 }
2017 else {
2018 /* a regular variable */
2019 _slang_add_variable(emitInfo->vt, n->Var);
2020 if (!_slang_alloc_var(emitInfo->vt, n->Store)) {
2021 slang_info_log_error(emitInfo->log,
2022 "Ran out of registers, too many variables");
2023 return NULL;
2024 }
2025 /*
2026 printf("IR_VAR_DECL %s %d store %p\n",
2027 (char*) n->Var->a_name, n->Store->Index, (void*) n->Store);
2028 */
2029 assert(n->Var->store == n->Store);
2030 }
2031 if (emitInfo->EmitComments) {
2032 /* emit NOP with comment describing the variable's storage location */
2033 char s[1000];
2034 sprintf(s, "TEMP[%d]%s = variable %s (size %d)",
2035 n->Store->Index,
2036 _mesa_swizzle_string(n->Store->Swizzle, 0, GL_FALSE),
2037 (n->Var ? (char *) n->Var->a_name : "anonymous"),
2038 n->Store->Size);
2039 emit_comment(emitInfo, s);
2040 }
2041 return NULL;
2042 }
2043
2044
2045 /**
2046 * Emit code for a reference to a variable.
2047 * Actually, no code is generated but we may do some memory allocation.
2048 * In particular, state vars (uniforms) are allocated on an as-needed basis.
2049 */
2050 static struct prog_instruction *
2051 emit_var_ref(slang_emit_info *emitInfo, slang_ir_node *n)
2052 {
2053 assert(n->Store);
2054 assert(n->Store->File != PROGRAM_UNDEFINED);
2055
2056 if (n->Store->File == PROGRAM_STATE_VAR && n->Store->Index < 0) {
2057 n->Store->Index = _slang_alloc_statevar(n, emitInfo->prog->Parameters);
2058 }
2059 else if (n->Store->File == PROGRAM_UNIFORM) {
2060 /* mark var as used */
2061 _mesa_use_uniform(emitInfo->prog->Parameters, (char *) n->Var->a_name);
2062 }
2063
2064 if (n->Store->Index < 0) {
2065 /* probably ran out of registers */
2066 return NULL;
2067 }
2068 assert(n->Store->Size > 0);
2069
2070 return NULL;
2071 }
2072
2073
2074 static struct prog_instruction *
2075 emit(slang_emit_info *emitInfo, slang_ir_node *n)
2076 {
2077 struct prog_instruction *inst;
2078 if (!n)
2079 return NULL;
2080
2081 if (emitInfo->log->error_flag) {
2082 return NULL;
2083 }
2084
2085 switch (n->Opcode) {
2086 case IR_SEQ:
2087 /* sequence of two sub-trees */
2088 assert(n->Children[0]);
2089 assert(n->Children[1]);
2090 emit(emitInfo, n->Children[0]);
2091 if (emitInfo->log->error_flag)
2092 return NULL;
2093 inst = emit(emitInfo, n->Children[1]);
2094 #if 0
2095 assert(!n->Store);
2096 #endif
2097 n->Store = n->Children[1]->Store;
2098 return inst;
2099
2100 case IR_SCOPE:
2101 /* new variable scope */
2102 _slang_push_var_table(emitInfo->vt);
2103 inst = emit(emitInfo, n->Children[0]);
2104 _slang_pop_var_table(emitInfo->vt);
2105 return inst;
2106
2107 case IR_VAR_DECL:
2108 /* Variable declaration - allocate a register for it */
2109 inst = emit_var_decl(emitInfo, n);
2110 return inst;
2111
2112 case IR_VAR:
2113 /* Reference to a variable
2114 * Storage should have already been resolved/allocated.
2115 */
2116 return emit_var_ref(emitInfo, n);
2117
2118 case IR_ELEMENT:
2119 return emit_array_element(emitInfo, n);
2120 case IR_FIELD:
2121 return emit_struct_field(emitInfo, n);
2122 case IR_SWIZZLE:
2123 return emit_swizzle(emitInfo, n);
2124
2125 /* Simple arithmetic */
2126 /* unary */
2127 case IR_MOVE:
2128 case IR_RSQ:
2129 case IR_RCP:
2130 case IR_FLOOR:
2131 case IR_FRAC:
2132 case IR_F_TO_I:
2133 case IR_I_TO_F:
2134 case IR_ABS:
2135 case IR_SIN:
2136 case IR_COS:
2137 case IR_DDX:
2138 case IR_DDY:
2139 case IR_EXP:
2140 case IR_EXP2:
2141 case IR_LOG2:
2142 case IR_NOISE1:
2143 case IR_NOISE2:
2144 case IR_NOISE3:
2145 case IR_NOISE4:
2146 case IR_NRM4:
2147 case IR_NRM3:
2148 /* binary */
2149 case IR_ADD:
2150 case IR_SUB:
2151 case IR_MUL:
2152 case IR_DOT4:
2153 case IR_DOT3:
2154 case IR_DOT2:
2155 case IR_CROSS:
2156 case IR_MIN:
2157 case IR_MAX:
2158 case IR_SEQUAL:
2159 case IR_SNEQUAL:
2160 case IR_SGE:
2161 case IR_SGT:
2162 case IR_SLE:
2163 case IR_SLT:
2164 case IR_POW:
2165 /* trinary operators */
2166 case IR_LRP:
2167 return emit_arith(emitInfo, n);
2168
2169 case IR_EQUAL:
2170 case IR_NOTEQUAL:
2171 return emit_compare(emitInfo, n);
2172
2173 case IR_CLAMP:
2174 return emit_clamp(emitInfo, n);
2175 case IR_TEX:
2176 case IR_TEXB:
2177 case IR_TEXP:
2178 return emit_tex(emitInfo, n);
2179 case IR_NEG:
2180 return emit_negation(emitInfo, n);
2181 case IR_FLOAT:
2182 /* find storage location for this float constant */
2183 n->Store->Index = _mesa_add_unnamed_constant(emitInfo->prog->Parameters,
2184 n->Value,
2185 n->Store->Size,
2186 &n->Store->Swizzle);
2187 if (n->Store->Index < 0) {
2188 slang_info_log_error(emitInfo->log, "Ran out of space for constants");
2189 return NULL;
2190 }
2191 return NULL;
2192
2193 case IR_COPY:
2194 return emit_copy(emitInfo, n);
2195
2196 case IR_COND:
2197 return emit_cond(emitInfo, n);
2198
2199 case IR_NOT:
2200 return emit_not(emitInfo, n);
2201
2202 case IR_LABEL:
2203 return emit_label(emitInfo, n);
2204
2205 case IR_KILL:
2206 return emit_kill(emitInfo);
2207
2208 case IR_CALL:
2209 /* new variable scope for subroutines/function calls */
2210 _slang_push_var_table(emitInfo->vt);
2211 inst = emit_fcall(emitInfo, n);
2212 _slang_pop_var_table(emitInfo->vt);
2213 return inst;
2214
2215 case IR_IF:
2216 return emit_if(emitInfo, n);
2217
2218 case IR_LOOP:
2219 return emit_loop(emitInfo, n);
2220 case IR_BREAK_IF_TRUE:
2221 case IR_CONT_IF_TRUE:
2222 return emit_cont_break_if_true(emitInfo, n);
2223 case IR_BREAK:
2224 /* fall-through */
2225 case IR_CONT:
2226 return emit_cont_break(emitInfo, n);
2227
2228 case IR_BEGIN_SUB:
2229 return new_instruction(emitInfo, OPCODE_BGNSUB);
2230 case IR_END_SUB:
2231 return new_instruction(emitInfo, OPCODE_ENDSUB);
2232 case IR_RETURN:
2233 return emit_return(emitInfo, n);
2234
2235 case IR_NOP:
2236 return NULL;
2237
2238 default:
2239 _mesa_problem(NULL, "Unexpected IR opcode in emit()\n");
2240 }
2241 return NULL;
2242 }
2243
2244
2245 /**
2246 * After code generation, any subroutines will be in separate program
2247 * objects. This function appends all the subroutines onto the main
2248 * program and resolves the linking of all the branch/call instructions.
2249 * XXX this logic should really be part of the linking process...
2250 */
2251 static void
2252 _slang_resolve_subroutines(slang_emit_info *emitInfo)
2253 {
2254 GET_CURRENT_CONTEXT(ctx);
2255 struct gl_program *mainP = emitInfo->prog;
2256 GLuint *subroutineLoc, i, total;
2257
2258 subroutineLoc
2259 = (GLuint *) _mesa_malloc(emitInfo->NumSubroutines * sizeof(GLuint));
2260
2261 /* total number of instructions */
2262 total = mainP->NumInstructions;
2263 for (i = 0; i < emitInfo->NumSubroutines; i++) {
2264 subroutineLoc[i] = total;
2265 total += emitInfo->Subroutines[i]->NumInstructions;
2266 }
2267
2268 /* adjust BranchTargets within the functions */
2269 for (i = 0; i < emitInfo->NumSubroutines; i++) {
2270 struct gl_program *sub = emitInfo->Subroutines[i];
2271 GLuint j;
2272 for (j = 0; j < sub->NumInstructions; j++) {
2273 struct prog_instruction *inst = sub->Instructions + j;
2274 if (inst->Opcode != OPCODE_CAL && inst->BranchTarget >= 0) {
2275 inst->BranchTarget += subroutineLoc[i];
2276 }
2277 }
2278 }
2279
2280 /* append subroutines' instructions after main's instructions */
2281 mainP->Instructions = _mesa_realloc_instructions(mainP->Instructions,
2282 mainP->NumInstructions,
2283 total);
2284 mainP->NumInstructions = total;
2285 for (i = 0; i < emitInfo->NumSubroutines; i++) {
2286 struct gl_program *sub = emitInfo->Subroutines[i];
2287 _mesa_copy_instructions(mainP->Instructions + subroutineLoc[i],
2288 sub->Instructions,
2289 sub->NumInstructions);
2290 /* delete subroutine code */
2291 sub->Parameters = NULL; /* prevent double-free */
2292 _mesa_reference_program(ctx, &emitInfo->Subroutines[i], NULL);
2293 }
2294
2295 /* free subroutine list */
2296 if (emitInfo->Subroutines) {
2297 _mesa_free(emitInfo->Subroutines);
2298 emitInfo->Subroutines = NULL;
2299 }
2300 emitInfo->NumSubroutines = 0;
2301
2302 /* Examine CAL instructions.
2303 * At this point, the BranchTarget field of the CAL instruction is
2304 * the number/id of the subroutine to call (an index into the
2305 * emitInfo->Subroutines list).
2306 * Translate that into an actual instruction location now.
2307 */
2308 for (i = 0; i < mainP->NumInstructions; i++) {
2309 struct prog_instruction *inst = mainP->Instructions + i;
2310 if (inst->Opcode == OPCODE_CAL) {
2311 const GLuint f = inst->BranchTarget;
2312 inst->BranchTarget = subroutineLoc[f];
2313 }
2314 }
2315
2316 _mesa_free(subroutineLoc);
2317 }
2318
2319
2320
2321
2322 GLboolean
2323 _slang_emit_code(slang_ir_node *n, slang_var_table *vt,
2324 struct gl_program *prog, GLboolean withEnd,
2325 slang_info_log *log)
2326 {
2327 GET_CURRENT_CONTEXT(ctx);
2328 GLboolean success;
2329 slang_emit_info emitInfo;
2330 GLuint maxUniforms;
2331
2332 emitInfo.log = log;
2333 emitInfo.vt = vt;
2334 emitInfo.prog = prog;
2335 emitInfo.Subroutines = NULL;
2336 emitInfo.NumSubroutines = 0;
2337 emitInfo.MaxInstructions = prog->NumInstructions;
2338
2339 emitInfo.EmitHighLevelInstructions = ctx->Shader.EmitHighLevelInstructions;
2340 emitInfo.EmitCondCodes = ctx->Shader.EmitCondCodes;
2341 emitInfo.EmitComments = ctx->Shader.EmitComments;
2342 emitInfo.EmitBeginEndSub = GL_TRUE;
2343
2344 if (!emitInfo.EmitCondCodes) {
2345 emitInfo.EmitHighLevelInstructions = GL_TRUE;
2346 }
2347
2348 /* Check uniform/constant limits */
2349 if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
2350 maxUniforms = ctx->Const.FragmentProgram.MaxUniformComponents / 4;
2351 }
2352 else {
2353 assert(prog->Target == GL_VERTEX_PROGRAM_ARB);
2354 maxUniforms = ctx->Const.VertexProgram.MaxUniformComponents / 4;
2355 }
2356 if (prog->Parameters->NumParameters > maxUniforms) {
2357 slang_info_log_error(log, "Constant/uniform register limit exceeded");
2358 return GL_FALSE;
2359 }
2360
2361 (void) emit(&emitInfo, n);
2362
2363 /* finish up by adding the END opcode to program */
2364 if (withEnd) {
2365 struct prog_instruction *inst;
2366 inst = new_instruction(&emitInfo, OPCODE_END);
2367 }
2368
2369 _slang_resolve_subroutines(&emitInfo);
2370
2371 success = GL_TRUE;
2372
2373 #if 0
2374 printf("*********** End emit code (%u inst):\n", prog->NumInstructions);
2375 _mesa_print_program(prog);
2376 _mesa_print_program_parameters(ctx,prog);
2377 #endif
2378
2379 return success;
2380 }