mesa: fix another "out of samplers" problem
[mesa.git] / src / mesa / shader / slang / slang_emit.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2005-2008 Brian Paul All Rights Reserved.
5 * Copyright (C) 2008 VMware, Inc. All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file slang_emit.c
27 * Emit program instructions (PI code) from IR trees.
28 * \author Brian Paul
29 */
30
31 /***
32 *** NOTES
33 ***
34 *** To emit GPU instructions, we basically just do an in-order traversal
35 *** of the IR tree.
36 ***/
37
38
39 #include "main/imports.h"
40 #include "main/context.h"
41 #include "main/macros.h"
42 #include "shader/program.h"
43 #include "shader/prog_instruction.h"
44 #include "shader/prog_parameter.h"
45 #include "shader/prog_print.h"
46 #include "slang_builtin.h"
47 #include "slang_emit.h"
48 #include "slang_mem.h"
49
50
51 #define PEEPHOLE_OPTIMIZATIONS 1
52 #define ANNOTATE 0
53
54
55 typedef struct
56 {
57 slang_info_log *log;
58 slang_var_table *vt;
59 struct gl_program *prog;
60 struct gl_program **Subroutines;
61 GLuint NumSubroutines;
62
63 GLuint MaxInstructions; /**< size of prog->Instructions[] buffer */
64
65 /* code-gen options */
66 GLboolean EmitHighLevelInstructions;
67 GLboolean EmitCondCodes;
68 GLboolean EmitComments;
69 GLboolean EmitBeginEndSub; /* XXX TEMPORARY */
70 } slang_emit_info;
71
72
73
74 static struct gl_program *
75 new_subroutine(slang_emit_info *emitInfo, GLuint *id)
76 {
77 GET_CURRENT_CONTEXT(ctx);
78 const GLuint n = emitInfo->NumSubroutines;
79
80 emitInfo->Subroutines = (struct gl_program **)
81 _mesa_realloc(emitInfo->Subroutines,
82 n * sizeof(struct gl_program),
83 (n + 1) * sizeof(struct gl_program));
84 emitInfo->Subroutines[n] = ctx->Driver.NewProgram(ctx, emitInfo->prog->Target, 0);
85 emitInfo->Subroutines[n]->Parameters = emitInfo->prog->Parameters;
86 emitInfo->NumSubroutines++;
87 *id = n;
88 return emitInfo->Subroutines[n];
89 }
90
91
92 /**
93 * Convert a writemask to a swizzle. Used for testing cond codes because
94 * we only want to test the cond code component(s) that was set by the
95 * previous instruction.
96 */
97 static GLuint
98 writemask_to_swizzle(GLuint writemask)
99 {
100 if (writemask == WRITEMASK_X)
101 return SWIZZLE_XXXX;
102 if (writemask == WRITEMASK_Y)
103 return SWIZZLE_YYYY;
104 if (writemask == WRITEMASK_Z)
105 return SWIZZLE_ZZZZ;
106 if (writemask == WRITEMASK_W)
107 return SWIZZLE_WWWW;
108 return SWIZZLE_XYZW; /* shouldn't be hit */
109 }
110
111
112 /**
113 * Convert a swizzle mask to a writemask.
114 * Note that the slang_ir_storage->Swizzle field can represent either a
115 * swizzle mask or a writemask, depending on how it's used. For example,
116 * when we parse "direction.yz" alone, we don't know whether .yz is a
117 * writemask or a swizzle. In this case, we encode ".yz" in store->Swizzle
118 * as a swizzle mask (.yz?? actually). Later, if direction.yz is used as
119 * an R-value, we use store->Swizzle as-is. Otherwise, if direction.yz is
120 * used as an L-value, we convert it to a writemask.
121 */
122 static GLuint
123 swizzle_to_writemask(GLuint swizzle)
124 {
125 GLuint i, writemask = 0x0;
126 for (i = 0; i < 4; i++) {
127 GLuint swz = GET_SWZ(swizzle, i);
128 if (swz <= SWIZZLE_W) {
129 writemask |= (1 << swz);
130 }
131 }
132 return writemask;
133 }
134
135
136 /**
137 * Swizzle a swizzle (function composition).
138 * That is, return swz2(swz1), or said another way: swz1.szw2
139 * Example: swizzle_swizzle(".zwxx", ".xxyw") yields ".zzwx"
140 */
141 GLuint
142 _slang_swizzle_swizzle(GLuint swz1, GLuint swz2)
143 {
144 GLuint i, swz, s[4];
145 for (i = 0; i < 4; i++) {
146 GLuint c = GET_SWZ(swz2, i);
147 if (c <= SWIZZLE_W)
148 s[i] = GET_SWZ(swz1, c);
149 else
150 s[i] = c;
151 }
152 swz = MAKE_SWIZZLE4(s[0], s[1], s[2], s[3]);
153 return swz;
154 }
155
156
157 /**
158 * Return the default swizzle mask for accessing a variable of the
159 * given size (in floats). If size = 1, comp is used to identify
160 * which component [0..3] of the register holds the variable.
161 */
162 GLuint
163 _slang_var_swizzle(GLint size, GLint comp)
164 {
165 switch (size) {
166 case 1:
167 return MAKE_SWIZZLE4(comp, comp, comp, comp);
168 case 2:
169 return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_NIL, SWIZZLE_NIL);
170 case 3:
171 return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_NIL);
172 default:
173 return SWIZZLE_XYZW;
174 }
175 }
176
177
178
179 /**
180 * Allocate storage for the given node (if it hasn't already been allocated).
181 *
182 * Typically this is temporary storage for an intermediate result (such as
183 * for a multiply or add, etc).
184 *
185 * If n->Store does not exist it will be created and will be of the size
186 * specified by defaultSize.
187 */
188 static GLboolean
189 alloc_node_storage(slang_emit_info *emitInfo, slang_ir_node *n,
190 GLint defaultSize)
191 {
192 assert(!n->Var);
193 if (!n->Store) {
194 assert(defaultSize > 0);
195 n->Store = _slang_new_ir_storage(PROGRAM_TEMPORARY, -1, defaultSize);
196 }
197
198 /* now allocate actual register(s). I.e. set n->Store->Index >= 0 */
199 if (n->Store->Index < 0) {
200 if (!_slang_alloc_temp(emitInfo->vt, n->Store)) {
201 slang_info_log_error(emitInfo->log,
202 "Ran out of registers, too many temporaries");
203 _slang_free(n->Store);
204 n->Store = NULL;
205 return GL_FALSE;
206 }
207 }
208 return GL_TRUE;
209 }
210
211
212 /**
213 * Free temporary storage, if n->Store is, in fact, temp storage.
214 * Otherwise, no-op.
215 */
216 static void
217 free_node_storage(slang_var_table *vt, slang_ir_node *n)
218 {
219 if (n->Store->File == PROGRAM_TEMPORARY &&
220 n->Store->Index >= 0 &&
221 n->Opcode != IR_SWIZZLE) {
222 if (_slang_is_temp(vt, n->Store)) {
223 _slang_free_temp(vt, n->Store);
224 n->Store->Index = -1;
225 n->Store = NULL; /* XXX this may not be needed */
226 }
227 }
228 }
229
230
231 /**
232 * Helper function to allocate a short-term temporary.
233 * Free it with _slang_free_temp().
234 */
235 static GLboolean
236 alloc_local_temp(slang_emit_info *emitInfo, slang_ir_storage *temp, GLint size)
237 {
238 assert(size >= 1);
239 assert(size <= 4);
240 _mesa_bzero(temp, sizeof(*temp));
241 temp->Size = size;
242 temp->File = PROGRAM_TEMPORARY;
243 temp->Index = -1;
244 return _slang_alloc_temp(emitInfo->vt, temp);
245 }
246
247
248 /**
249 * Remove any SWIZZLE_NIL terms from given swizzle mask.
250 * For a swizzle like .z??? generate .zzzz (replicate single component).
251 * Else, for .wx?? generate .wxzw (insert default component for the position).
252 */
253 static GLuint
254 fix_swizzle(GLuint swizzle)
255 {
256 GLuint c0 = GET_SWZ(swizzle, 0),
257 c1 = GET_SWZ(swizzle, 1),
258 c2 = GET_SWZ(swizzle, 2),
259 c3 = GET_SWZ(swizzle, 3);
260 if (c1 == SWIZZLE_NIL && c2 == SWIZZLE_NIL && c3 == SWIZZLE_NIL) {
261 /* smear first component across all positions */
262 c1 = c2 = c3 = c0;
263 }
264 else {
265 /* insert default swizzle components */
266 if (c0 == SWIZZLE_NIL)
267 c0 = SWIZZLE_X;
268 if (c1 == SWIZZLE_NIL)
269 c1 = SWIZZLE_Y;
270 if (c2 == SWIZZLE_NIL)
271 c2 = SWIZZLE_Z;
272 if (c3 == SWIZZLE_NIL)
273 c3 = SWIZZLE_W;
274 }
275 return MAKE_SWIZZLE4(c0, c1, c2, c3);
276 }
277
278
279
280 /**
281 * Convert IR storage to an instruction dst register.
282 */
283 static void
284 storage_to_dst_reg(struct prog_dst_register *dst, const slang_ir_storage *st)
285 {
286 const GLboolean relAddr = st->RelAddr;
287 const GLint size = st->Size;
288 GLint index = st->Index;
289 GLuint swizzle = st->Swizzle;
290
291 assert(index >= 0);
292 /* if this is storage relative to some parent storage, walk up the tree */
293 while (st->Parent) {
294 st = st->Parent;
295 assert(st->Index >= 0);
296 index += st->Index;
297 swizzle = _slang_swizzle_swizzle(st->Swizzle, swizzle);
298 }
299
300 assert(st->File != PROGRAM_UNDEFINED);
301 dst->File = st->File;
302
303 assert(index >= 0);
304 dst->Index = index;
305
306 assert(size >= 1);
307 assert(size <= 4);
308
309 if (swizzle != SWIZZLE_XYZW) {
310 dst->WriteMask = swizzle_to_writemask(swizzle);
311 }
312 else {
313 GLuint writemask;
314 switch (size) {
315 case 1:
316 writemask = WRITEMASK_X << GET_SWZ(st->Swizzle, 0);
317 break;
318 case 2:
319 writemask = WRITEMASK_XY;
320 break;
321 case 3:
322 writemask = WRITEMASK_XYZ;
323 break;
324 case 4:
325 writemask = WRITEMASK_XYZW;
326 break;
327 default:
328 ; /* error would have been caught above */
329 }
330 dst->WriteMask = writemask;
331 }
332
333 dst->RelAddr = relAddr;
334 }
335
336
337 /**
338 * Convert IR storage to an instruction src register.
339 */
340 static void
341 storage_to_src_reg(struct prog_src_register *src, const slang_ir_storage *st)
342 {
343 const GLboolean relAddr = st->RelAddr;
344 GLint index = st->Index;
345 GLuint swizzle = st->Swizzle;
346
347 /* if this is storage relative to some parent storage, walk up the tree */
348 assert(index >= 0);
349 while (st->Parent) {
350 st = st->Parent;
351 if (st->Index < 0) {
352 /* an error should have been reported already */
353 return;
354 }
355 assert(st->Index >= 0);
356 index += st->Index;
357 swizzle = _slang_swizzle_swizzle(fix_swizzle(st->Swizzle), swizzle);
358 }
359
360 assert(st->File >= 0);
361 #if 1 /* XXX temporary */
362 if (st->File == PROGRAM_UNDEFINED) {
363 slang_ir_storage *st0 = (slang_ir_storage *) st;
364 st0->File = PROGRAM_TEMPORARY;
365 }
366 #endif
367 assert(st->File < PROGRAM_UNDEFINED);
368 src->File = st->File;
369
370 assert(index >= 0);
371 src->Index = index;
372
373 swizzle = fix_swizzle(swizzle);
374 assert(GET_SWZ(swizzle, 0) <= SWIZZLE_W);
375 assert(GET_SWZ(swizzle, 1) <= SWIZZLE_W);
376 assert(GET_SWZ(swizzle, 2) <= SWIZZLE_W);
377 assert(GET_SWZ(swizzle, 3) <= SWIZZLE_W);
378 src->Swizzle = swizzle;
379
380 src->RelAddr = relAddr;
381 }
382
383
384 /*
385 * Setup storage pointing to a scalar constant/literal.
386 */
387 static void
388 constant_to_storage(slang_emit_info *emitInfo,
389 GLfloat val,
390 slang_ir_storage *store)
391 {
392 GLuint swizzle;
393 GLint reg;
394 GLfloat value[4];
395
396 value[0] = val;
397 reg = _mesa_add_unnamed_constant(emitInfo->prog->Parameters,
398 value, 1, &swizzle);
399
400 memset(store, 0, sizeof(*store));
401 store->File = PROGRAM_CONSTANT;
402 store->Index = reg;
403 store->Swizzle = swizzle;
404 }
405
406
407 /**
408 * Add new instruction at end of given program.
409 * \param prog the program to append instruction onto
410 * \param opcode opcode for the new instruction
411 * \return pointer to the new instruction
412 */
413 static struct prog_instruction *
414 new_instruction(slang_emit_info *emitInfo, gl_inst_opcode opcode)
415 {
416 struct gl_program *prog = emitInfo->prog;
417 struct prog_instruction *inst;
418
419 #if 0
420 /* print prev inst */
421 if (prog->NumInstructions > 0) {
422 _mesa_print_instruction(prog->Instructions + prog->NumInstructions - 1);
423 }
424 #endif
425 assert(prog->NumInstructions <= emitInfo->MaxInstructions);
426
427 if (prog->NumInstructions == emitInfo->MaxInstructions) {
428 /* grow the instruction buffer */
429 emitInfo->MaxInstructions += 20;
430 prog->Instructions =
431 _mesa_realloc_instructions(prog->Instructions,
432 prog->NumInstructions,
433 emitInfo->MaxInstructions);
434 }
435
436 inst = prog->Instructions + prog->NumInstructions;
437 prog->NumInstructions++;
438 _mesa_init_instructions(inst, 1);
439 inst->Opcode = opcode;
440 inst->BranchTarget = -1; /* invalid */
441 /*
442 printf("New inst %d: %p %s\n", prog->NumInstructions-1,(void*)inst,
443 _mesa_opcode_string(inst->Opcode));
444 */
445 return inst;
446 }
447
448
449 static struct prog_instruction *
450 emit_arl_load(slang_emit_info *emitInfo,
451 enum register_file file, GLint index, GLuint swizzle)
452 {
453 struct prog_instruction *inst = new_instruction(emitInfo, OPCODE_ARL);
454 inst->SrcReg[0].File = file;
455 inst->SrcReg[0].Index = index;
456 inst->SrcReg[0].Swizzle = swizzle;
457 inst->DstReg.File = PROGRAM_ADDRESS;
458 inst->DstReg.Index = 0;
459 inst->DstReg.WriteMask = WRITEMASK_X;
460 return inst;
461 }
462
463
464 /**
465 * Emit a new instruction with given opcode, operands.
466 * At this point the instruction may have multiple indirect register
467 * loads/stores. We convert those into ARL loads and address-relative
468 * operands. See comments inside.
469 * At some point in the future we could directly emit indirectly addressed
470 * registers in Mesa GPU instructions.
471 */
472 static struct prog_instruction *
473 emit_instruction(slang_emit_info *emitInfo,
474 gl_inst_opcode opcode,
475 const slang_ir_storage *dst,
476 const slang_ir_storage *src0,
477 const slang_ir_storage *src1,
478 const slang_ir_storage *src2)
479 {
480 struct prog_instruction *inst;
481 GLuint numIndirect = 0;
482 const slang_ir_storage *src[3];
483 slang_ir_storage newSrc[3], newDst;
484 GLuint i;
485 GLboolean isTemp[3];
486
487 isTemp[0] = isTemp[1] = isTemp[2] = GL_FALSE;
488
489 src[0] = src0;
490 src[1] = src1;
491 src[2] = src2;
492
493 /* count up how many operands are indirect loads */
494 for (i = 0; i < 3; i++) {
495 if (src[i] && src[i]->IsIndirect)
496 numIndirect++;
497 }
498 if (dst && dst->IsIndirect)
499 numIndirect++;
500
501 /* Take special steps for indirect register loads.
502 * If we had multiple address registers this would be simpler.
503 * For example, this GLSL code:
504 * x[i] = y[j] + z[k];
505 * would translate into something like:
506 * ARL ADDR.x, i;
507 * ARL ADDR.y, j;
508 * ARL ADDR.z, k;
509 * ADD TEMP[ADDR.x+5], TEMP[ADDR.y+9], TEMP[ADDR.z+4];
510 * But since we currently only have one address register we have to do this:
511 * ARL ADDR.x, i;
512 * MOV t1, TEMP[ADDR.x+9];
513 * ARL ADDR.x, j;
514 * MOV t2, TEMP[ADDR.x+4];
515 * ARL ADDR.x, k;
516 * ADD TEMP[ADDR.x+5], t1, t2;
517 * The code here figures this out...
518 */
519 if (numIndirect > 0) {
520 for (i = 0; i < 3; i++) {
521 if (src[i] && src[i]->IsIndirect) {
522 /* load the ARL register with the indirect register */
523 emit_arl_load(emitInfo,
524 src[i]->IndirectFile,
525 src[i]->IndirectIndex,
526 src[i]->IndirectSwizzle);
527
528 if (numIndirect > 1) {
529 /* Need to load src[i] into a temporary register */
530 slang_ir_storage srcRelAddr;
531 alloc_local_temp(emitInfo, &newSrc[i], src[i]->Size);
532 isTemp[i] = GL_TRUE;
533
534 /* set RelAddr flag on src register */
535 srcRelAddr = *src[i];
536 srcRelAddr.RelAddr = GL_TRUE;
537 srcRelAddr.IsIndirect = GL_FALSE; /* not really needed */
538
539 /* MOV newSrc, srcRelAddr; */
540 inst = emit_instruction(emitInfo,
541 OPCODE_MOV,
542 &newSrc[i],
543 &srcRelAddr,
544 NULL,
545 NULL);
546
547 src[i] = &newSrc[i];
548 }
549 else {
550 /* just rewrite the src[i] storage to be ARL-relative */
551 newSrc[i] = *src[i];
552 newSrc[i].RelAddr = GL_TRUE;
553 newSrc[i].IsIndirect = GL_FALSE; /* not really needed */
554 src[i] = &newSrc[i];
555 }
556 }
557 }
558 }
559
560 /* Take special steps for indirect dest register write */
561 if (dst && dst->IsIndirect) {
562 /* load the ARL register with the indirect register */
563 emit_arl_load(emitInfo,
564 dst->IndirectFile,
565 dst->IndirectIndex,
566 dst->IndirectSwizzle);
567 newDst = *dst;
568 newDst.RelAddr = GL_TRUE;
569 newDst.IsIndirect = GL_FALSE;
570 dst = &newDst;
571 }
572
573 /* OK, emit the instruction and its dst, src regs */
574 inst = new_instruction(emitInfo, opcode);
575 if (!inst)
576 return NULL;
577
578 if (dst)
579 storage_to_dst_reg(&inst->DstReg, dst);
580
581 for (i = 0; i < 3; i++) {
582 if (src[i])
583 storage_to_src_reg(&inst->SrcReg[i], src[i]);
584 }
585
586 /* Free any temp registers that we allocated above */
587 for (i = 0; i < 3; i++) {
588 if (isTemp[i])
589 _slang_free_temp(emitInfo->vt, &newSrc[i]);
590 }
591
592 return inst;
593 }
594
595
596
597 /**
598 * Put a comment on the given instruction.
599 */
600 static void
601 inst_comment(struct prog_instruction *inst, const char *comment)
602 {
603 if (inst)
604 inst->Comment = _mesa_strdup(comment);
605 }
606
607
608
609 /**
610 * Return pointer to last instruction in program.
611 */
612 static struct prog_instruction *
613 prev_instruction(slang_emit_info *emitInfo)
614 {
615 struct gl_program *prog = emitInfo->prog;
616 if (prog->NumInstructions == 0)
617 return NULL;
618 else
619 return prog->Instructions + prog->NumInstructions - 1;
620 }
621
622
623 static struct prog_instruction *
624 emit(slang_emit_info *emitInfo, slang_ir_node *n);
625
626
627 /**
628 * Return an annotation string for given node's storage.
629 */
630 static char *
631 storage_annotation(const slang_ir_node *n, const struct gl_program *prog)
632 {
633 #if ANNOTATE
634 const slang_ir_storage *st = n->Store;
635 static char s[100] = "";
636
637 if (!st)
638 return _mesa_strdup("");
639
640 switch (st->File) {
641 case PROGRAM_CONSTANT:
642 if (st->Index >= 0) {
643 const GLfloat *val = prog->Parameters->ParameterValues[st->Index];
644 if (st->Swizzle == SWIZZLE_NOOP)
645 sprintf(s, "{%g, %g, %g, %g}", val[0], val[1], val[2], val[3]);
646 else {
647 sprintf(s, "%g", val[GET_SWZ(st->Swizzle, 0)]);
648 }
649 }
650 break;
651 case PROGRAM_TEMPORARY:
652 if (n->Var)
653 sprintf(s, "%s", (char *) n->Var->a_name);
654 else
655 sprintf(s, "t[%d]", st->Index);
656 break;
657 case PROGRAM_STATE_VAR:
658 case PROGRAM_UNIFORM:
659 sprintf(s, "%s", prog->Parameters->Parameters[st->Index].Name);
660 break;
661 case PROGRAM_VARYING:
662 sprintf(s, "%s", prog->Varying->Parameters[st->Index].Name);
663 break;
664 case PROGRAM_INPUT:
665 sprintf(s, "input[%d]", st->Index);
666 break;
667 case PROGRAM_OUTPUT:
668 sprintf(s, "output[%d]", st->Index);
669 break;
670 default:
671 s[0] = 0;
672 }
673 return _mesa_strdup(s);
674 #else
675 return NULL;
676 #endif
677 }
678
679
680 /**
681 * Return an annotation string for an instruction.
682 */
683 static char *
684 instruction_annotation(gl_inst_opcode opcode, char *dstAnnot,
685 char *srcAnnot0, char *srcAnnot1, char *srcAnnot2)
686 {
687 #if ANNOTATE
688 const char *operator;
689 char *s;
690 int len = 50;
691
692 if (dstAnnot)
693 len += strlen(dstAnnot);
694 else
695 dstAnnot = _mesa_strdup("");
696
697 if (srcAnnot0)
698 len += strlen(srcAnnot0);
699 else
700 srcAnnot0 = _mesa_strdup("");
701
702 if (srcAnnot1)
703 len += strlen(srcAnnot1);
704 else
705 srcAnnot1 = _mesa_strdup("");
706
707 if (srcAnnot2)
708 len += strlen(srcAnnot2);
709 else
710 srcAnnot2 = _mesa_strdup("");
711
712 switch (opcode) {
713 case OPCODE_ADD:
714 operator = "+";
715 break;
716 case OPCODE_SUB:
717 operator = "-";
718 break;
719 case OPCODE_MUL:
720 operator = "*";
721 break;
722 case OPCODE_DP3:
723 operator = "DP3";
724 break;
725 case OPCODE_DP4:
726 operator = "DP4";
727 break;
728 case OPCODE_XPD:
729 operator = "XPD";
730 break;
731 case OPCODE_RSQ:
732 operator = "RSQ";
733 break;
734 case OPCODE_SGT:
735 operator = ">";
736 break;
737 default:
738 operator = ",";
739 }
740
741 s = (char *) malloc(len);
742 sprintf(s, "%s = %s %s %s %s", dstAnnot,
743 srcAnnot0, operator, srcAnnot1, srcAnnot2);
744 assert(_mesa_strlen(s) < len);
745
746 free(dstAnnot);
747 free(srcAnnot0);
748 free(srcAnnot1);
749 free(srcAnnot2);
750
751 return s;
752 #else
753 return NULL;
754 #endif
755 }
756
757
758 /**
759 * Emit an instruction that's just a comment.
760 */
761 static struct prog_instruction *
762 emit_comment(slang_emit_info *emitInfo, const char *comment)
763 {
764 struct prog_instruction *inst = new_instruction(emitInfo, OPCODE_NOP);
765 inst_comment(inst, comment);
766 return inst;
767 }
768
769
770 /**
771 * Generate code for a simple arithmetic instruction.
772 * Either 1, 2 or 3 operands.
773 */
774 static struct prog_instruction *
775 emit_arith(slang_emit_info *emitInfo, slang_ir_node *n)
776 {
777 const slang_ir_info *info = _slang_ir_info(n->Opcode);
778 struct prog_instruction *inst;
779 GLuint i;
780
781 assert(info);
782 assert(info->InstOpcode != OPCODE_NOP);
783
784 #if PEEPHOLE_OPTIMIZATIONS
785 /* Look for MAD opportunity */
786 if (info->NumParams == 2 &&
787 n->Opcode == IR_ADD && n->Children[0]->Opcode == IR_MUL) {
788 /* found pattern IR_ADD(IR_MUL(A, B), C) */
789 emit(emitInfo, n->Children[0]->Children[0]); /* A */
790 emit(emitInfo, n->Children[0]->Children[1]); /* B */
791 emit(emitInfo, n->Children[1]); /* C */
792 alloc_node_storage(emitInfo, n, -1); /* dest */
793
794 inst = emit_instruction(emitInfo,
795 OPCODE_MAD,
796 n->Store,
797 n->Children[0]->Children[0]->Store,
798 n->Children[0]->Children[1]->Store,
799 n->Children[1]->Store);
800
801 free_node_storage(emitInfo->vt, n->Children[0]->Children[0]);
802 free_node_storage(emitInfo->vt, n->Children[0]->Children[1]);
803 free_node_storage(emitInfo->vt, n->Children[1]);
804 return inst;
805 }
806
807 if (info->NumParams == 2 &&
808 n->Opcode == IR_ADD && n->Children[1]->Opcode == IR_MUL) {
809 /* found pattern IR_ADD(A, IR_MUL(B, C)) */
810 emit(emitInfo, n->Children[0]); /* A */
811 emit(emitInfo, n->Children[1]->Children[0]); /* B */
812 emit(emitInfo, n->Children[1]->Children[1]); /* C */
813 alloc_node_storage(emitInfo, n, -1); /* dest */
814
815 inst = emit_instruction(emitInfo,
816 OPCODE_MAD,
817 n->Store,
818 n->Children[1]->Children[0]->Store,
819 n->Children[1]->Children[1]->Store,
820 n->Children[0]->Store);
821
822 free_node_storage(emitInfo->vt, n->Children[1]->Children[0]);
823 free_node_storage(emitInfo->vt, n->Children[1]->Children[1]);
824 free_node_storage(emitInfo->vt, n->Children[0]);
825 return inst;
826 }
827 #endif
828
829 /* gen code for children, may involve temp allocation */
830 for (i = 0; i < info->NumParams; i++) {
831 emit(emitInfo, n->Children[i]);
832 if (!n->Children[i] || !n->Children[i]->Store) {
833 /* error recovery */
834 return NULL;
835 }
836 }
837
838 /* result storage */
839 alloc_node_storage(emitInfo, n, -1);
840
841 inst = emit_instruction(emitInfo,
842 info->InstOpcode,
843 n->Store, /* dest */
844 (info->NumParams > 0 ? n->Children[0]->Store : NULL),
845 (info->NumParams > 1 ? n->Children[1]->Store : NULL),
846 (info->NumParams > 2 ? n->Children[2]->Store : NULL)
847 );
848
849 /* free temps */
850 for (i = 0; i < info->NumParams; i++)
851 free_node_storage(emitInfo->vt, n->Children[i]);
852
853 return inst;
854 }
855
856
857 /**
858 * Emit code for == and != operators. These could normally be handled
859 * by emit_arith() except we need to be able to handle structure comparisons.
860 */
861 static struct prog_instruction *
862 emit_compare(slang_emit_info *emitInfo, slang_ir_node *n)
863 {
864 struct prog_instruction *inst = NULL;
865 GLint size;
866
867 assert(n->Opcode == IR_EQUAL || n->Opcode == IR_NOTEQUAL);
868
869 /* gen code for children */
870 emit(emitInfo, n->Children[0]);
871 emit(emitInfo, n->Children[1]);
872
873 if (n->Children[0]->Store->Size != n->Children[1]->Store->Size) {
874 slang_info_log_error(emitInfo->log, "invalid operands to == or !=");
875 return NULL;
876 }
877
878 /* final result is 1 bool */
879 if (!alloc_node_storage(emitInfo, n, 1))
880 return NULL;
881
882 size = n->Children[0]->Store->Size;
883
884 if (size == 1) {
885 gl_inst_opcode opcode = n->Opcode == IR_EQUAL ? OPCODE_SEQ : OPCODE_SNE;
886 inst = emit_instruction(emitInfo,
887 opcode,
888 n->Store, /* dest */
889 n->Children[0]->Store,
890 n->Children[1]->Store,
891 NULL);
892 }
893 else if (size <= 4) {
894 /* compare two vectors.
895 * Unfortunately, there's no instruction to compare vectors and
896 * return a scalar result. Do it with some compare and dot product
897 * instructions...
898 */
899 GLuint swizzle;
900 gl_inst_opcode dotOp;
901 slang_ir_storage tempStore;
902
903 if (!alloc_local_temp(emitInfo, &tempStore, 4)) {
904 return NULL;
905 /* out of temps */
906 }
907
908 if (size == 4) {
909 dotOp = OPCODE_DP4;
910 swizzle = SWIZZLE_XYZW;
911 }
912 else if (size == 3) {
913 dotOp = OPCODE_DP3;
914 swizzle = SWIZZLE_XYZW;
915 }
916 else {
917 assert(size == 2);
918 dotOp = OPCODE_DP3;
919 swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y);
920 }
921
922 /* Compute inequality (temp = (A != B)) */
923 inst = emit_instruction(emitInfo,
924 OPCODE_SNE,
925 &tempStore,
926 n->Children[0]->Store,
927 n->Children[1]->Store,
928 NULL);
929 inst_comment(inst, "Compare values");
930
931 /* Compute val = DOT(temp, temp) (reduction) */
932 inst = emit_instruction(emitInfo,
933 dotOp,
934 n->Store,
935 &tempStore,
936 &tempStore,
937 NULL);
938 inst->SrcReg[0].Swizzle = inst->SrcReg[1].Swizzle = swizzle; /*override*/
939 inst_comment(inst, "Reduce vec to bool");
940
941 _slang_free_temp(emitInfo->vt, &tempStore); /* free temp */
942
943 if (n->Opcode == IR_EQUAL) {
944 /* compute val = !val.x with SEQ val, val, 0; */
945 slang_ir_storage zero;
946 constant_to_storage(emitInfo, 0.0, &zero);
947 inst = emit_instruction(emitInfo,
948 OPCODE_SEQ,
949 n->Store, /* dest */
950 n->Store,
951 &zero,
952 NULL);
953 inst_comment(inst, "Invert true/false");
954 }
955 }
956 else {
957 /* size > 4, struct or array compare.
958 * XXX this won't work reliably for structs with padding!!
959 */
960 GLint i, num = (n->Children[0]->Store->Size + 3) / 4;
961 slang_ir_storage accTemp, sneTemp;
962
963 if (!alloc_local_temp(emitInfo, &accTemp, 4))
964 return NULL;
965
966 if (!alloc_local_temp(emitInfo, &sneTemp, 4))
967 return NULL;
968
969 for (i = 0; i < num; i++) {
970 slang_ir_storage srcStore0 = *n->Children[0]->Store;
971 slang_ir_storage srcStore1 = *n->Children[1]->Store;
972 srcStore0.Index += i;
973 srcStore1.Index += i;
974
975 if (i == 0) {
976 /* SNE accTemp, left[i], right[i] */
977 inst = emit_instruction(emitInfo, OPCODE_SNE,
978 &accTemp, /* dest */
979 &srcStore0,
980 &srcStore1,
981 NULL);
982 inst_comment(inst, "Begin struct/array comparison");
983 }
984 else {
985 /* SNE sneTemp, left[i], right[i] */
986 inst = emit_instruction(emitInfo, OPCODE_SNE,
987 &sneTemp, /* dest */
988 &srcStore0,
989 &srcStore1,
990 NULL);
991 /* ADD accTemp, accTemp, sneTemp; # like logical-OR */
992 inst = emit_instruction(emitInfo, OPCODE_ADD,
993 &accTemp, /* dest */
994 &accTemp,
995 &sneTemp,
996 NULL);
997 }
998 }
999
1000 /* compute accTemp.x || accTemp.y || accTemp.z || accTemp.w with DOT4 */
1001 inst = emit_instruction(emitInfo, OPCODE_DP4,
1002 n->Store,
1003 &accTemp,
1004 &accTemp,
1005 NULL);
1006 inst_comment(inst, "End struct/array comparison");
1007
1008 if (n->Opcode == IR_EQUAL) {
1009 /* compute tmp.x = !tmp.x via tmp.x = (tmp.x == 0) */
1010 slang_ir_storage zero;
1011 constant_to_storage(emitInfo, 0.0, &zero);
1012 inst = emit_instruction(emitInfo, OPCODE_SEQ,
1013 n->Store, /* dest */
1014 n->Store,
1015 &zero,
1016 NULL);
1017 inst_comment(inst, "Invert true/false");
1018 }
1019
1020 _slang_free_temp(emitInfo->vt, &accTemp);
1021 _slang_free_temp(emitInfo->vt, &sneTemp);
1022 }
1023
1024 /* free temps */
1025 free_node_storage(emitInfo->vt, n->Children[0]);
1026 free_node_storage(emitInfo->vt, n->Children[1]);
1027
1028 return inst;
1029 }
1030
1031
1032
1033 /**
1034 * Generate code for an IR_CLAMP instruction.
1035 */
1036 static struct prog_instruction *
1037 emit_clamp(slang_emit_info *emitInfo, slang_ir_node *n)
1038 {
1039 struct prog_instruction *inst;
1040 slang_ir_node tmpNode;
1041
1042 assert(n->Opcode == IR_CLAMP);
1043 /* ch[0] = value
1044 * ch[1] = min limit
1045 * ch[2] = max limit
1046 */
1047
1048 inst = emit(emitInfo, n->Children[0]);
1049
1050 /* If lower limit == 0.0 and upper limit == 1.0,
1051 * set prev instruction's SaturateMode field to SATURATE_ZERO_ONE.
1052 * Else,
1053 * emit OPCODE_MIN, OPCODE_MAX sequence.
1054 */
1055 #if 0
1056 /* XXX this isn't quite finished yet */
1057 if (n->Children[1]->Opcode == IR_FLOAT &&
1058 n->Children[1]->Value[0] == 0.0 &&
1059 n->Children[1]->Value[1] == 0.0 &&
1060 n->Children[1]->Value[2] == 0.0 &&
1061 n->Children[1]->Value[3] == 0.0 &&
1062 n->Children[2]->Opcode == IR_FLOAT &&
1063 n->Children[2]->Value[0] == 1.0 &&
1064 n->Children[2]->Value[1] == 1.0 &&
1065 n->Children[2]->Value[2] == 1.0 &&
1066 n->Children[2]->Value[3] == 1.0) {
1067 if (!inst) {
1068 inst = prev_instruction(prog);
1069 }
1070 if (inst && inst->Opcode != OPCODE_NOP) {
1071 /* and prev instruction's DstReg matches n->Children[0]->Store */
1072 inst->SaturateMode = SATURATE_ZERO_ONE;
1073 n->Store = n->Children[0]->Store;
1074 return inst;
1075 }
1076 }
1077 #endif
1078
1079 if (!alloc_node_storage(emitInfo, n, n->Children[0]->Store->Size))
1080 return NULL;
1081
1082 emit(emitInfo, n->Children[1]);
1083 emit(emitInfo, n->Children[2]);
1084
1085 /* Some GPUs don't allow reading from output registers. So if the
1086 * dest for this clamp() is an output reg, we can't use that reg for
1087 * the intermediate result. Use a temp register instead.
1088 */
1089 _mesa_bzero(&tmpNode, sizeof(tmpNode));
1090 alloc_node_storage(emitInfo, &tmpNode, n->Store->Size);
1091
1092 /* tmp = max(ch[0], ch[1]) */
1093 inst = emit_instruction(emitInfo, OPCODE_MAX,
1094 tmpNode.Store, /* dest */
1095 n->Children[0]->Store,
1096 n->Children[1]->Store,
1097 NULL);
1098
1099 /* n->dest = min(tmp, ch[2]) */
1100 inst = emit_instruction(emitInfo, OPCODE_MIN,
1101 n->Store, /* dest */
1102 tmpNode.Store,
1103 n->Children[2]->Store,
1104 NULL);
1105
1106 free_node_storage(emitInfo->vt, &tmpNode);
1107
1108 return inst;
1109 }
1110
1111
1112 static struct prog_instruction *
1113 emit_negation(slang_emit_info *emitInfo, slang_ir_node *n)
1114 {
1115 /* Implement as MOV dst, -src; */
1116 /* XXX we could look at the previous instruction and in some circumstances
1117 * modify it to accomplish the negation.
1118 */
1119 struct prog_instruction *inst;
1120
1121 emit(emitInfo, n->Children[0]);
1122
1123 if (!alloc_node_storage(emitInfo, n, n->Children[0]->Store->Size))
1124 return NULL;
1125
1126 inst = emit_instruction(emitInfo,
1127 OPCODE_MOV,
1128 n->Store, /* dest */
1129 n->Children[0]->Store,
1130 NULL,
1131 NULL);
1132 inst->SrcReg[0].NegateBase = NEGATE_XYZW;
1133 return inst;
1134 }
1135
1136
1137 static struct prog_instruction *
1138 emit_label(slang_emit_info *emitInfo, const slang_ir_node *n)
1139 {
1140 assert(n->Label);
1141 #if 0
1142 /* XXX this fails in loop tail code - investigate someday */
1143 assert(_slang_label_get_location(n->Label) < 0);
1144 _slang_label_set_location(n->Label, emitInfo->prog->NumInstructions,
1145 emitInfo->prog);
1146 #else
1147 if (_slang_label_get_location(n->Label) < 0)
1148 _slang_label_set_location(n->Label, emitInfo->prog->NumInstructions,
1149 emitInfo->prog);
1150 #endif
1151 return NULL;
1152 }
1153
1154
1155 /**
1156 * Emit code for a function call.
1157 * Note that for each time a function is called, we emit the function's
1158 * body code again because the set of available registers may be different.
1159 */
1160 static struct prog_instruction *
1161 emit_fcall(slang_emit_info *emitInfo, slang_ir_node *n)
1162 {
1163 struct gl_program *progSave;
1164 struct prog_instruction *inst;
1165 GLuint subroutineId;
1166 GLuint maxInstSave;
1167
1168 assert(n->Opcode == IR_CALL);
1169 assert(n->Label);
1170
1171 /* save/push cur program */
1172 maxInstSave = emitInfo->MaxInstructions;
1173 progSave = emitInfo->prog;
1174
1175 emitInfo->prog = new_subroutine(emitInfo, &subroutineId);
1176 emitInfo->MaxInstructions = emitInfo->prog->NumInstructions;
1177
1178 _slang_label_set_location(n->Label, emitInfo->prog->NumInstructions,
1179 emitInfo->prog);
1180
1181 if (emitInfo->EmitBeginEndSub) {
1182 /* BGNSUB isn't a real instruction.
1183 * We require a label (i.e. "foobar:") though, if we're going to
1184 * print the program in the NV format. The BNGSUB instruction is
1185 * really just a NOP to attach the label to.
1186 */
1187 inst = new_instruction(emitInfo, OPCODE_BGNSUB);
1188 inst_comment(inst, n->Label->Name);
1189 }
1190
1191 /* body of function: */
1192 emit(emitInfo, n->Children[0]);
1193 n->Store = n->Children[0]->Store;
1194
1195 /* add RET instruction now, if needed */
1196 inst = prev_instruction(emitInfo);
1197 if (inst && inst->Opcode != OPCODE_RET) {
1198 inst = new_instruction(emitInfo, OPCODE_RET);
1199 }
1200
1201 if (emitInfo->EmitBeginEndSub) {
1202 inst = new_instruction(emitInfo, OPCODE_ENDSUB);
1203 inst_comment(inst, n->Label->Name);
1204 }
1205
1206 /* pop/restore cur program */
1207 emitInfo->prog = progSave;
1208 emitInfo->MaxInstructions = maxInstSave;
1209
1210 /* emit the function call */
1211 inst = new_instruction(emitInfo, OPCODE_CAL);
1212 /* The branch target is just the subroutine number (changed later) */
1213 inst->BranchTarget = subroutineId;
1214 inst_comment(inst, n->Label->Name);
1215 assert(inst->BranchTarget >= 0);
1216
1217 return inst;
1218 }
1219
1220
1221 /**
1222 * Emit code for a 'return' statement.
1223 */
1224 static struct prog_instruction *
1225 emit_return(slang_emit_info *emitInfo, slang_ir_node *n)
1226 {
1227 struct prog_instruction *inst;
1228 assert(n);
1229 assert(n->Opcode == IR_RETURN);
1230 assert(n->Label);
1231 inst = new_instruction(emitInfo, OPCODE_RET);
1232 inst->DstReg.CondMask = COND_TR; /* always return */
1233 return inst;
1234 }
1235
1236
1237 static struct prog_instruction *
1238 emit_kill(slang_emit_info *emitInfo)
1239 {
1240 struct gl_fragment_program *fp;
1241 struct prog_instruction *inst;
1242 /* NV-KILL - discard fragment depending on condition code.
1243 * Note that ARB-KILL depends on sign of vector operand.
1244 */
1245 inst = new_instruction(emitInfo, OPCODE_KIL_NV);
1246 inst->DstReg.CondMask = COND_TR; /* always kill */
1247
1248 assert(emitInfo->prog->Target == GL_FRAGMENT_PROGRAM_ARB);
1249 fp = (struct gl_fragment_program *) emitInfo->prog;
1250 fp->UsesKill = GL_TRUE;
1251
1252 return inst;
1253 }
1254
1255
1256 static struct prog_instruction *
1257 emit_tex(slang_emit_info *emitInfo, slang_ir_node *n)
1258 {
1259 struct prog_instruction *inst;
1260 gl_inst_opcode opcode;
1261
1262 if (n->Opcode == IR_TEX) {
1263 opcode = OPCODE_TEX;
1264 }
1265 else if (n->Opcode == IR_TEXB) {
1266 opcode = OPCODE_TXB;
1267 }
1268 else {
1269 assert(n->Opcode == IR_TEXP);
1270 opcode = OPCODE_TXP;
1271 }
1272
1273 /* emit code for the texcoord operand */
1274 (void) emit(emitInfo, n->Children[1]);
1275
1276 /* alloc storage for result of texture fetch */
1277 if (!alloc_node_storage(emitInfo, n, 4))
1278 return NULL;
1279
1280 /* emit TEX instruction; Child[1] is the texcoord */
1281 inst = emit_instruction(emitInfo,
1282 opcode,
1283 n->Store,
1284 n->Children[1]->Store,
1285 NULL,
1286 NULL);
1287
1288 /* Child[0] is the sampler (a uniform which'll indicate the texture unit) */
1289 assert(n->Children[0]->Store);
1290 assert(n->Children[0]->Store->File == PROGRAM_SAMPLER);
1291 /* Store->Index is the sampler index */
1292 assert(n->Children[0]->Store->Index >= 0);
1293 /* Store->Size is the texture target */
1294 assert(n->Children[0]->Store->Size >= TEXTURE_1D_INDEX);
1295 assert(n->Children[0]->Store->Size <= TEXTURE_RECT_INDEX);
1296
1297 inst->TexSrcTarget = n->Children[0]->Store->Size;
1298 inst->TexSrcUnit = n->Children[0]->Store->Index; /* i.e. uniform's index */
1299
1300 /* mark the sampler as being used */
1301 _mesa_use_uniform(emitInfo->prog->Parameters,
1302 (char *) n->Children[0]->Var->a_name);
1303
1304 return inst;
1305 }
1306
1307
1308 /**
1309 * Assignment/copy
1310 */
1311 static struct prog_instruction *
1312 emit_copy(slang_emit_info *emitInfo, slang_ir_node *n)
1313 {
1314 struct prog_instruction *inst;
1315
1316 assert(n->Opcode == IR_COPY);
1317
1318 /* lhs */
1319 emit(emitInfo, n->Children[0]);
1320 if (!n->Children[0]->Store || n->Children[0]->Store->Index < 0) {
1321 /* an error should have been already recorded */
1322 return NULL;
1323 }
1324
1325 /* rhs */
1326 assert(n->Children[1]);
1327 inst = emit(emitInfo, n->Children[1]);
1328
1329 if (!n->Children[1]->Store || n->Children[1]->Store->Index < 0) {
1330 if (!emitInfo->log->text) {
1331 slang_info_log_error(emitInfo->log, "invalid assignment");
1332 }
1333 return NULL;
1334 }
1335
1336 assert(n->Children[1]->Store->Index >= 0);
1337
1338 /*assert(n->Children[0]->Store->Size == n->Children[1]->Store->Size);*/
1339
1340 n->Store = n->Children[0]->Store;
1341
1342 if (n->Store->File == PROGRAM_SAMPLER) {
1343 /* no code generated for sampler assignments,
1344 * just copy the sampler index at compile time.
1345 */
1346 n->Store->Index = n->Children[1]->Store->Index;
1347 return NULL;
1348 }
1349
1350 #if PEEPHOLE_OPTIMIZATIONS
1351 if (inst &&
1352 _slang_is_temp(emitInfo->vt, n->Children[1]->Store) &&
1353 (inst->DstReg.File == n->Children[1]->Store->File) &&
1354 (inst->DstReg.Index == n->Children[1]->Store->Index) &&
1355 !n->Children[0]->Store->IsIndirect &&
1356 n->Children[0]->Store->Size <= 4) {
1357 /* Peephole optimization:
1358 * The Right-Hand-Side has its results in a temporary place.
1359 * Modify the RHS (and the prev instruction) to store its results
1360 * in the destination specified by n->Children[0].
1361 * Then, this MOVE is a no-op.
1362 * Ex:
1363 * MUL tmp, x, y;
1364 * MOV a, tmp;
1365 * becomes:
1366 * MUL a, x, y;
1367 */
1368 if (n->Children[1]->Opcode != IR_SWIZZLE)
1369 _slang_free_temp(emitInfo->vt, n->Children[1]->Store);
1370 *n->Children[1]->Store = *n->Children[0]->Store;
1371
1372 /* fixup the previous instruction (which stored the RHS result) */
1373 assert(n->Children[0]->Store->Index >= 0);
1374
1375 storage_to_dst_reg(&inst->DstReg, n->Children[0]->Store);
1376 return inst;
1377 }
1378 else
1379 #endif
1380 {
1381 if (n->Children[0]->Store->Size > 4) {
1382 /* move matrix/struct etc (block of registers) */
1383 slang_ir_storage dstStore = *n->Children[0]->Store;
1384 slang_ir_storage srcStore = *n->Children[1]->Store;
1385 GLint size = srcStore.Size;
1386 ASSERT(n->Children[1]->Store->Swizzle == SWIZZLE_NOOP);
1387 dstStore.Size = 4;
1388 srcStore.Size = 4;
1389 while (size >= 4) {
1390 inst = emit_instruction(emitInfo, OPCODE_MOV,
1391 &dstStore,
1392 &srcStore,
1393 NULL,
1394 NULL);
1395 inst_comment(inst, "IR_COPY block");
1396 srcStore.Index++;
1397 dstStore.Index++;
1398 size -= 4;
1399 }
1400 }
1401 else {
1402 /* single register move */
1403 char *srcAnnot, *dstAnnot;
1404 assert(n->Children[0]->Store->Index >= 0);
1405 inst = emit_instruction(emitInfo, OPCODE_MOV,
1406 n->Children[0]->Store, /* dest */
1407 n->Children[1]->Store,
1408 NULL,
1409 NULL);
1410 dstAnnot = storage_annotation(n->Children[0], emitInfo->prog);
1411 srcAnnot = storage_annotation(n->Children[1], emitInfo->prog);
1412 inst->Comment = instruction_annotation(inst->Opcode, dstAnnot,
1413 srcAnnot, NULL, NULL);
1414 }
1415 free_node_storage(emitInfo->vt, n->Children[1]);
1416 return inst;
1417 }
1418 }
1419
1420
1421 /**
1422 * An IR_COND node wraps a boolean expression which is used by an
1423 * IF or WHILE test. This is where we'll set condition codes, if needed.
1424 */
1425 static struct prog_instruction *
1426 emit_cond(slang_emit_info *emitInfo, slang_ir_node *n)
1427 {
1428 struct prog_instruction *inst;
1429
1430 assert(n->Opcode == IR_COND);
1431
1432 if (!n->Children[0])
1433 return NULL;
1434
1435 /* emit code for the expression */
1436 inst = emit(emitInfo, n->Children[0]);
1437
1438 if (!n->Children[0]->Store) {
1439 /* error recovery */
1440 return NULL;
1441 }
1442
1443 assert(n->Children[0]->Store);
1444 /*assert(n->Children[0]->Store->Size == 1);*/
1445
1446 if (emitInfo->EmitCondCodes) {
1447 if (inst &&
1448 n->Children[0]->Store &&
1449 inst->DstReg.File == n->Children[0]->Store->File &&
1450 inst->DstReg.Index == n->Children[0]->Store->Index) {
1451 /* The previous instruction wrote to the register who's value
1452 * we're testing. Just fix that instruction so that the
1453 * condition codes are computed.
1454 */
1455 inst->CondUpdate = GL_TRUE;
1456 n->Store = n->Children[0]->Store;
1457 return inst;
1458 }
1459 else {
1460 /* This'll happen for things like "if (i) ..." where no code
1461 * is normally generated for the expression "i".
1462 * Generate a move instruction just to set condition codes.
1463 */
1464 if (!alloc_node_storage(emitInfo, n, 1))
1465 return NULL;
1466 inst = emit_instruction(emitInfo, OPCODE_MOV,
1467 n->Store, /* dest */
1468 n->Children[0]->Store,
1469 NULL,
1470 NULL);
1471 inst->CondUpdate = GL_TRUE;
1472 inst_comment(inst, "COND expr");
1473 _slang_free_temp(emitInfo->vt, n->Store);
1474 return inst;
1475 }
1476 }
1477 else {
1478 /* No-op: the boolean result of the expression is in a regular reg */
1479 n->Store = n->Children[0]->Store;
1480 return inst;
1481 }
1482 }
1483
1484
1485 /**
1486 * Logical-NOT
1487 */
1488 static struct prog_instruction *
1489 emit_not(slang_emit_info *emitInfo, slang_ir_node *n)
1490 {
1491 static const struct {
1492 gl_inst_opcode op, opNot;
1493 } operators[] = {
1494 { OPCODE_SLT, OPCODE_SGE },
1495 { OPCODE_SLE, OPCODE_SGT },
1496 { OPCODE_SGT, OPCODE_SLE },
1497 { OPCODE_SGE, OPCODE_SLT },
1498 { OPCODE_SEQ, OPCODE_SNE },
1499 { OPCODE_SNE, OPCODE_SEQ },
1500 { 0, 0 }
1501 };
1502 struct prog_instruction *inst;
1503 slang_ir_storage zero;
1504 GLuint i;
1505
1506 /* child expr */
1507 inst = emit(emitInfo, n->Children[0]);
1508
1509 #if PEEPHOLE_OPTIMIZATIONS
1510 if (inst) {
1511 /* if the prev instruction was a comparison instruction, invert it */
1512 for (i = 0; operators[i].op; i++) {
1513 if (inst->Opcode == operators[i].op) {
1514 inst->Opcode = operators[i].opNot;
1515 n->Store = n->Children[0]->Store;
1516 return inst;
1517 }
1518 }
1519 }
1520 #endif
1521
1522 /* else, invert using SEQ (v = v == 0) */
1523 if (!alloc_node_storage(emitInfo, n, n->Children[0]->Store->Size))
1524 return NULL;
1525
1526 constant_to_storage(emitInfo, 0.0, &zero);
1527 inst = emit_instruction(emitInfo,
1528 OPCODE_SEQ,
1529 n->Store,
1530 n->Children[0]->Store,
1531 &zero,
1532 NULL);
1533 inst_comment(inst, "NOT");
1534
1535 free_node_storage(emitInfo->vt, n->Children[0]);
1536
1537 return inst;
1538 }
1539
1540
1541 static struct prog_instruction *
1542 emit_if(slang_emit_info *emitInfo, slang_ir_node *n)
1543 {
1544 struct gl_program *prog = emitInfo->prog;
1545 GLuint ifInstLoc, elseInstLoc = 0;
1546 GLuint condWritemask = 0;
1547
1548 /* emit condition expression code */
1549 {
1550 struct prog_instruction *inst;
1551 inst = emit(emitInfo, n->Children[0]);
1552 if (emitInfo->EmitCondCodes) {
1553 if (!inst) {
1554 /* error recovery */
1555 return NULL;
1556 }
1557 condWritemask = inst->DstReg.WriteMask;
1558 }
1559 }
1560
1561 if (!n->Children[0]->Store)
1562 return NULL;
1563
1564 #if 0
1565 assert(n->Children[0]->Store->Size == 1); /* a bool! */
1566 #endif
1567
1568 ifInstLoc = prog->NumInstructions;
1569 if (emitInfo->EmitHighLevelInstructions) {
1570 if (emitInfo->EmitCondCodes) {
1571 /* IF condcode THEN ... */
1572 struct prog_instruction *ifInst;
1573 ifInst = new_instruction(emitInfo, OPCODE_IF);
1574 ifInst->DstReg.CondMask = COND_NE; /* if cond is non-zero */
1575 /* only test the cond code (1 of 4) that was updated by the
1576 * previous instruction.
1577 */
1578 ifInst->DstReg.CondSwizzle = writemask_to_swizzle(condWritemask);
1579 }
1580 else {
1581 /* IF src[0] THEN ... */
1582 emit_instruction(emitInfo, OPCODE_IF,
1583 NULL, /* dst */
1584 n->Children[0]->Store, /* op0 */
1585 NULL,
1586 NULL);
1587 }
1588 }
1589 else {
1590 /* conditional jump to else, or endif */
1591 struct prog_instruction *ifInst = new_instruction(emitInfo, OPCODE_BRA);
1592 ifInst->DstReg.CondMask = COND_EQ; /* BRA if cond is zero */
1593 inst_comment(ifInst, "if zero");
1594 ifInst->DstReg.CondSwizzle = writemask_to_swizzle(condWritemask);
1595 }
1596
1597 /* if body */
1598 emit(emitInfo, n->Children[1]);
1599
1600 if (n->Children[2]) {
1601 /* have else body */
1602 elseInstLoc = prog->NumInstructions;
1603 if (emitInfo->EmitHighLevelInstructions) {
1604 (void) new_instruction(emitInfo, OPCODE_ELSE);
1605 }
1606 else {
1607 /* jump to endif instruction */
1608 struct prog_instruction *inst;
1609 inst = new_instruction(emitInfo, OPCODE_BRA);
1610 inst_comment(inst, "else");
1611 inst->DstReg.CondMask = COND_TR; /* always branch */
1612 }
1613 prog->Instructions[ifInstLoc].BranchTarget = prog->NumInstructions;
1614 emit(emitInfo, n->Children[2]);
1615 }
1616 else {
1617 /* no else body */
1618 prog->Instructions[ifInstLoc].BranchTarget = prog->NumInstructions;
1619 }
1620
1621 if (emitInfo->EmitHighLevelInstructions) {
1622 (void) new_instruction(emitInfo, OPCODE_ENDIF);
1623 }
1624
1625 if (n->Children[2]) {
1626 prog->Instructions[elseInstLoc].BranchTarget = prog->NumInstructions;
1627 }
1628 return NULL;
1629 }
1630
1631
1632 static struct prog_instruction *
1633 emit_loop(slang_emit_info *emitInfo, slang_ir_node *n)
1634 {
1635 struct gl_program *prog = emitInfo->prog;
1636 struct prog_instruction *endInst;
1637 GLuint beginInstLoc, tailInstLoc, endInstLoc;
1638 slang_ir_node *ir;
1639
1640 /* emit OPCODE_BGNLOOP */
1641 beginInstLoc = prog->NumInstructions;
1642 if (emitInfo->EmitHighLevelInstructions) {
1643 (void) new_instruction(emitInfo, OPCODE_BGNLOOP);
1644 }
1645
1646 /* body */
1647 emit(emitInfo, n->Children[0]);
1648
1649 /* tail */
1650 tailInstLoc = prog->NumInstructions;
1651 if (n->Children[1]) {
1652 if (emitInfo->EmitComments)
1653 emit_comment(emitInfo, "Loop tail code:");
1654 emit(emitInfo, n->Children[1]);
1655 }
1656
1657 endInstLoc = prog->NumInstructions;
1658 if (emitInfo->EmitHighLevelInstructions) {
1659 /* emit OPCODE_ENDLOOP */
1660 endInst = new_instruction(emitInfo, OPCODE_ENDLOOP);
1661 }
1662 else {
1663 /* emit unconditional BRA-nch */
1664 endInst = new_instruction(emitInfo, OPCODE_BRA);
1665 endInst->DstReg.CondMask = COND_TR; /* always true */
1666 }
1667 /* ENDLOOP's BranchTarget points to the BGNLOOP inst */
1668 endInst->BranchTarget = beginInstLoc;
1669
1670 if (emitInfo->EmitHighLevelInstructions) {
1671 /* BGNLOOP's BranchTarget points to the ENDLOOP inst */
1672 prog->Instructions[beginInstLoc].BranchTarget = prog->NumInstructions -1;
1673 }
1674
1675 /* Done emitting loop code. Now walk over the loop's linked list of
1676 * BREAK and CONT nodes, filling in their BranchTarget fields (which
1677 * will point to the ENDLOOP+1 or BGNLOOP instructions, respectively).
1678 */
1679 for (ir = n->List; ir; ir = ir->List) {
1680 struct prog_instruction *inst = prog->Instructions + ir->InstLocation;
1681 assert(inst->BranchTarget < 0);
1682 if (ir->Opcode == IR_BREAK ||
1683 ir->Opcode == IR_BREAK_IF_TRUE) {
1684 assert(inst->Opcode == OPCODE_BRK ||
1685 inst->Opcode == OPCODE_BRA);
1686 /* go to instruction after end of loop */
1687 inst->BranchTarget = endInstLoc + 1;
1688 }
1689 else {
1690 assert(ir->Opcode == IR_CONT ||
1691 ir->Opcode == IR_CONT_IF_TRUE);
1692 assert(inst->Opcode == OPCODE_CONT ||
1693 inst->Opcode == OPCODE_BRA);
1694 /* go to instruction at tail of loop */
1695 inst->BranchTarget = endInstLoc;
1696 }
1697 }
1698 return NULL;
1699 }
1700
1701
1702 /**
1703 * Unconditional "continue" or "break" statement.
1704 * Either OPCODE_CONT, OPCODE_BRK or OPCODE_BRA will be emitted.
1705 */
1706 static struct prog_instruction *
1707 emit_cont_break(slang_emit_info *emitInfo, slang_ir_node *n)
1708 {
1709 gl_inst_opcode opcode;
1710 struct prog_instruction *inst;
1711
1712 if (n->Opcode == IR_CONT) {
1713 /* we need to execute the loop's tail code before doing CONT */
1714 assert(n->Parent);
1715 assert(n->Parent->Opcode == IR_LOOP);
1716 if (n->Parent->Children[1]) {
1717 /* emit tail code */
1718 if (emitInfo->EmitComments) {
1719 emit_comment(emitInfo, "continue - tail code:");
1720 }
1721 emit(emitInfo, n->Parent->Children[1]);
1722 }
1723 }
1724
1725 /* opcode selection */
1726 if (emitInfo->EmitHighLevelInstructions) {
1727 opcode = (n->Opcode == IR_CONT) ? OPCODE_CONT : OPCODE_BRK;
1728 }
1729 else {
1730 opcode = OPCODE_BRA;
1731 }
1732 n->InstLocation = emitInfo->prog->NumInstructions;
1733 inst = new_instruction(emitInfo, opcode);
1734 inst->DstReg.CondMask = COND_TR; /* always true */
1735 return inst;
1736 }
1737
1738
1739 /**
1740 * Conditional "continue" or "break" statement.
1741 * Either OPCODE_CONT, OPCODE_BRK or OPCODE_BRA will be emitted.
1742 */
1743 static struct prog_instruction *
1744 emit_cont_break_if_true(slang_emit_info *emitInfo, slang_ir_node *n)
1745 {
1746 struct prog_instruction *inst;
1747
1748 assert(n->Opcode == IR_CONT_IF_TRUE ||
1749 n->Opcode == IR_BREAK_IF_TRUE);
1750
1751 /* evaluate condition expr, setting cond codes */
1752 inst = emit(emitInfo, n->Children[0]);
1753 if (emitInfo->EmitCondCodes) {
1754 assert(inst);
1755 inst->CondUpdate = GL_TRUE;
1756 }
1757
1758 n->InstLocation = emitInfo->prog->NumInstructions;
1759
1760 /* opcode selection */
1761 if (emitInfo->EmitHighLevelInstructions) {
1762 const gl_inst_opcode opcode
1763 = (n->Opcode == IR_CONT_IF_TRUE) ? OPCODE_CONT : OPCODE_BRK;
1764 if (emitInfo->EmitCondCodes) {
1765 /* Get the writemask from the previous instruction which set
1766 * the condcodes. Use that writemask as the CondSwizzle.
1767 */
1768 const GLuint condWritemask = inst->DstReg.WriteMask;
1769 inst = new_instruction(emitInfo, opcode);
1770 inst->DstReg.CondMask = COND_NE;
1771 inst->DstReg.CondSwizzle = writemask_to_swizzle(condWritemask);
1772 return inst;
1773 }
1774 else {
1775 /* IF reg
1776 * BRK/CONT;
1777 * ENDIF
1778 */
1779 GLint ifInstLoc;
1780 ifInstLoc = emitInfo->prog->NumInstructions;
1781 inst = emit_instruction(emitInfo, OPCODE_IF,
1782 NULL, /* dest */
1783 n->Children[0]->Store,
1784 NULL,
1785 NULL);
1786 n->InstLocation = emitInfo->prog->NumInstructions;
1787
1788 inst = new_instruction(emitInfo, opcode);
1789 inst = new_instruction(emitInfo, OPCODE_ENDIF);
1790
1791 emitInfo->prog->Instructions[ifInstLoc].BranchTarget
1792 = emitInfo->prog->NumInstructions;
1793 return inst;
1794 }
1795 }
1796 else {
1797 const GLuint condWritemask = inst->DstReg.WriteMask;
1798 assert(emitInfo->EmitCondCodes);
1799 inst = new_instruction(emitInfo, OPCODE_BRA);
1800 inst->DstReg.CondMask = COND_NE;
1801 inst->DstReg.CondSwizzle = writemask_to_swizzle(condWritemask);
1802 return inst;
1803 }
1804 }
1805
1806
1807 static struct prog_instruction *
1808 emit_swizzle(slang_emit_info *emitInfo, slang_ir_node *n)
1809 {
1810 struct prog_instruction *inst;
1811
1812 inst = emit(emitInfo, n->Children[0]);
1813
1814 #if 0
1815 assert(n->Store->Parent);
1816 /* Apply this node's swizzle to parent's storage */
1817 GLuint swizzle = n->Store->Swizzle;
1818 _slang_copy_ir_storage(n->Store, n->Store->Parent);
1819 n->Store->Swizzle = _slang_swizzle_swizzle(n->Store->Swizzle, swizzle);
1820 assert(!n->Store->Parent);
1821 #endif
1822 return inst;
1823 }
1824
1825
1826 /**
1827 * Dereference array element: element == array[index]
1828 * This basically involves emitting code for computing the array index
1829 * and updating the node/element's storage info.
1830 */
1831 static struct prog_instruction *
1832 emit_array_element(slang_emit_info *emitInfo, slang_ir_node *n)
1833 {
1834 slang_ir_storage *arrayStore, *indexStore;
1835 const int elemSize = n->Store->Size; /* number of floats */
1836 const GLint elemSizeVec = (elemSize + 3) / 4; /* number of vec4 */
1837 struct prog_instruction *inst;
1838
1839 assert(n->Opcode == IR_ELEMENT);
1840 assert(elemSize > 0);
1841
1842 /* special case for built-in state variables, like light state */
1843 {
1844 slang_ir_storage *root = n->Store;
1845 assert(!root->Parent);
1846 while (root->Parent)
1847 root = root->Parent;
1848
1849 if (root->File == PROGRAM_STATE_VAR) {
1850 GLboolean direct;
1851 GLint index =
1852 _slang_alloc_statevar(n, emitInfo->prog->Parameters, &direct);
1853 if (index < 0) {
1854 /* error */
1855 return NULL;
1856 }
1857 if (direct) {
1858 n->Store->Index = index;
1859 return NULL; /* all done */
1860 }
1861 }
1862 }
1863
1864 /* do codegen for array itself */
1865 emit(emitInfo, n->Children[0]);
1866 arrayStore = n->Children[0]->Store;
1867
1868 /* The initial array element storage is the array's storage,
1869 * then modified below.
1870 */
1871 _slang_copy_ir_storage(n->Store, arrayStore);
1872
1873
1874 if (n->Children[1]->Opcode == IR_FLOAT) {
1875 /* Constant array index */
1876 const GLint element = (GLint) n->Children[1]->Value[0];
1877
1878 /* this element's storage is the array's storage, plus constant offset */
1879 n->Store->Index += elemSizeVec * element;
1880 }
1881 else {
1882 /* Variable array index */
1883
1884 /* do codegen for array index expression */
1885 emit(emitInfo, n->Children[1]);
1886 indexStore = n->Children[1]->Store;
1887
1888 if (indexStore->IsIndirect) {
1889 /* need to put the array index into a temporary since we can't
1890 * directly support a[b[i]] constructs.
1891 */
1892
1893
1894 /*indexStore = tempstore();*/
1895 }
1896
1897
1898 if (elemSize > 4) {
1899 /* need to multiply array index by array element size */
1900 struct prog_instruction *inst;
1901 slang_ir_storage *indexTemp;
1902 slang_ir_storage elemSizeStore;
1903
1904 /* allocate 1 float indexTemp */
1905 indexTemp = _slang_new_ir_storage(PROGRAM_TEMPORARY, -1, 1);
1906 _slang_alloc_temp(emitInfo->vt, indexTemp);
1907
1908 /* allocate a constant containing the element size */
1909 constant_to_storage(emitInfo, (float) elemSizeVec, &elemSizeStore);
1910
1911 /* multiply array index by element size */
1912 inst = emit_instruction(emitInfo,
1913 OPCODE_MUL,
1914 indexTemp, /* dest */
1915 indexStore, /* the index */
1916 &elemSizeStore,
1917 NULL);
1918
1919 indexStore = indexTemp;
1920 }
1921
1922 if (arrayStore->IsIndirect) {
1923 /* ex: in a[i][j], a[i] (the arrayStore) is indirect */
1924 /* Need to add indexStore to arrayStore->Indirect store */
1925 slang_ir_storage indirectArray;
1926 slang_ir_storage *indexTemp;
1927
1928 _slang_init_ir_storage(&indirectArray,
1929 arrayStore->IndirectFile,
1930 arrayStore->IndirectIndex,
1931 1,
1932 arrayStore->IndirectSwizzle);
1933
1934 /* allocate 1 float indexTemp */
1935 indexTemp = _slang_new_ir_storage(PROGRAM_TEMPORARY, -1, 1);
1936 _slang_alloc_temp(emitInfo->vt, indexTemp);
1937
1938 inst = emit_instruction(emitInfo,
1939 OPCODE_ADD,
1940 indexTemp, /* dest */
1941 indexStore, /* the index */
1942 &indirectArray, /* indirect array base */
1943 NULL);
1944
1945 indexStore = indexTemp;
1946 }
1947
1948 /* update the array element storage info */
1949 n->Store->IsIndirect = GL_TRUE;
1950 n->Store->IndirectFile = indexStore->File;
1951 n->Store->IndirectIndex = indexStore->Index;
1952 n->Store->IndirectSwizzle = indexStore->Swizzle;
1953 }
1954
1955 n->Store->Size = elemSize;
1956 n->Store->Swizzle = _slang_var_swizzle(elemSize, 0);
1957
1958 return NULL; /* no instruction */
1959 }
1960
1961
1962 /**
1963 * Resolve storage for accessing a structure field.
1964 */
1965 static struct prog_instruction *
1966 emit_struct_field(slang_emit_info *emitInfo, slang_ir_node *n)
1967 {
1968 slang_ir_storage *root = n->Store;
1969 GLint fieldOffset, fieldSize;
1970
1971 assert(n->Opcode == IR_FIELD);
1972
1973 assert(!root->Parent);
1974 while (root->Parent)
1975 root = root->Parent;
1976
1977 /* If this is the field of a state var, allocate constant/uniform
1978 * storage for it now if we haven't already.
1979 * Note that we allocate storage (uniform/constant slots) for state
1980 * variables here rather than at declaration time so we only allocate
1981 * space for the ones that we actually use!
1982 */
1983 if (root->File == PROGRAM_STATE_VAR) {
1984 GLboolean direct;
1985 GLint index = _slang_alloc_statevar(n, emitInfo->prog->Parameters, &direct);
1986 if (index < 0) {
1987 slang_info_log_error(emitInfo->log, "Error parsing state variable");
1988 return NULL;
1989 }
1990 if (direct) {
1991 root->Index = index;
1992 return NULL; /* all done */
1993 }
1994 }
1995
1996 /* do codegen for struct */
1997 emit(emitInfo, n->Children[0]);
1998 assert(n->Children[0]->Store->Index >= 0);
1999
2000
2001 fieldOffset = n->Store->Index;
2002 fieldSize = n->Store->Size;
2003
2004 _slang_copy_ir_storage(n->Store, n->Children[0]->Store);
2005
2006 n->Store->Index = n->Children[0]->Store->Index + fieldOffset / 4;
2007 n->Store->Size = fieldSize;
2008
2009 switch (fieldSize) {
2010 case 1:
2011 {
2012 GLint swz = fieldOffset % 4;
2013 n->Store->Swizzle = MAKE_SWIZZLE4(swz, swz, swz, swz);
2014 }
2015 break;
2016 case 2:
2017 n->Store->Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
2018 SWIZZLE_NIL, SWIZZLE_NIL);
2019 break;
2020 case 3:
2021 n->Store->Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
2022 SWIZZLE_Z, SWIZZLE_NIL);
2023 break;
2024 default:
2025 n->Store->Swizzle = SWIZZLE_XYZW;
2026 }
2027
2028 assert(n->Store->Index >= 0);
2029
2030 return NULL; /* no instruction */
2031 }
2032
2033
2034 /**
2035 * Emit code for a variable declaration.
2036 * This usually doesn't result in any code generation, but just
2037 * memory allocation.
2038 */
2039 static struct prog_instruction *
2040 emit_var_decl(slang_emit_info *emitInfo, slang_ir_node *n)
2041 {
2042 assert(n->Store);
2043 assert(n->Store->File != PROGRAM_UNDEFINED);
2044 assert(n->Store->Size > 0);
2045 /*assert(n->Store->Index < 0);*/
2046
2047 if (!n->Var || n->Var->isTemp) {
2048 /* a nameless/temporary variable, will be freed after first use */
2049 /*NEW*/
2050 if (n->Store->Index < 0 && !_slang_alloc_temp(emitInfo->vt, n->Store)) {
2051 slang_info_log_error(emitInfo->log,
2052 "Ran out of registers, too many temporaries");
2053 return NULL;
2054 }
2055 }
2056 else {
2057 /* a regular variable */
2058 _slang_add_variable(emitInfo->vt, n->Var);
2059 if (!_slang_alloc_var(emitInfo->vt, n->Store)) {
2060 slang_info_log_error(emitInfo->log,
2061 "Ran out of registers, too many variables");
2062 return NULL;
2063 }
2064 /*
2065 printf("IR_VAR_DECL %s %d store %p\n",
2066 (char*) n->Var->a_name, n->Store->Index, (void*) n->Store);
2067 */
2068 assert(n->Var->store == n->Store);
2069 }
2070 if (emitInfo->EmitComments) {
2071 /* emit NOP with comment describing the variable's storage location */
2072 char s[1000];
2073 sprintf(s, "TEMP[%d]%s = variable %s (size %d)",
2074 n->Store->Index,
2075 _mesa_swizzle_string(n->Store->Swizzle, 0, GL_FALSE),
2076 (n->Var ? (char *) n->Var->a_name : "anonymous"),
2077 n->Store->Size);
2078 emit_comment(emitInfo, s);
2079 }
2080 return NULL;
2081 }
2082
2083
2084 /**
2085 * Emit code for a reference to a variable.
2086 * Actually, no code is generated but we may do some memory allocation.
2087 * In particular, state vars (uniforms) are allocated on an as-needed basis.
2088 */
2089 static struct prog_instruction *
2090 emit_var_ref(slang_emit_info *emitInfo, slang_ir_node *n)
2091 {
2092 assert(n->Store);
2093 assert(n->Store->File != PROGRAM_UNDEFINED);
2094
2095 if (n->Store->File == PROGRAM_STATE_VAR && n->Store->Index < 0) {
2096 GLboolean direct;
2097 GLint index = _slang_alloc_statevar(n, emitInfo->prog->Parameters, &direct);
2098 if (index < 0) {
2099 /* error */
2100 char s[100];
2101 snprintf(s, sizeof(s), "Undefined variable '%s'",
2102 (char *) n->Var->a_name);
2103 slang_info_log_error(emitInfo->log, s);
2104 return NULL;
2105 }
2106
2107 n->Store->Index = index;
2108 }
2109 else if (n->Store->File == PROGRAM_UNIFORM ||
2110 n->Store->File == PROGRAM_SAMPLER) {
2111 /* mark var as used */
2112 _mesa_use_uniform(emitInfo->prog->Parameters, (char *) n->Var->a_name);
2113 }
2114
2115 if (n->Store->Index < 0) {
2116 /* probably ran out of registers */
2117 return NULL;
2118 }
2119 assert(n->Store->Size > 0);
2120
2121 return NULL;
2122 }
2123
2124
2125 static struct prog_instruction *
2126 emit(slang_emit_info *emitInfo, slang_ir_node *n)
2127 {
2128 struct prog_instruction *inst;
2129 if (!n)
2130 return NULL;
2131
2132 if (emitInfo->log->error_flag) {
2133 return NULL;
2134 }
2135
2136 switch (n->Opcode) {
2137 case IR_SEQ:
2138 /* sequence of two sub-trees */
2139 assert(n->Children[0]);
2140 assert(n->Children[1]);
2141 emit(emitInfo, n->Children[0]);
2142 if (emitInfo->log->error_flag)
2143 return NULL;
2144 inst = emit(emitInfo, n->Children[1]);
2145 #if 0
2146 assert(!n->Store);
2147 #endif
2148 n->Store = n->Children[1]->Store;
2149 return inst;
2150
2151 case IR_SCOPE:
2152 /* new variable scope */
2153 _slang_push_var_table(emitInfo->vt);
2154 inst = emit(emitInfo, n->Children[0]);
2155 _slang_pop_var_table(emitInfo->vt);
2156 return inst;
2157
2158 case IR_VAR_DECL:
2159 /* Variable declaration - allocate a register for it */
2160 inst = emit_var_decl(emitInfo, n);
2161 return inst;
2162
2163 case IR_VAR:
2164 /* Reference to a variable
2165 * Storage should have already been resolved/allocated.
2166 */
2167 return emit_var_ref(emitInfo, n);
2168
2169 case IR_ELEMENT:
2170 return emit_array_element(emitInfo, n);
2171 case IR_FIELD:
2172 return emit_struct_field(emitInfo, n);
2173 case IR_SWIZZLE:
2174 return emit_swizzle(emitInfo, n);
2175
2176 /* Simple arithmetic */
2177 /* unary */
2178 case IR_MOVE:
2179 case IR_RSQ:
2180 case IR_RCP:
2181 case IR_FLOOR:
2182 case IR_FRAC:
2183 case IR_F_TO_I:
2184 case IR_I_TO_F:
2185 case IR_ABS:
2186 case IR_SIN:
2187 case IR_COS:
2188 case IR_DDX:
2189 case IR_DDY:
2190 case IR_EXP:
2191 case IR_EXP2:
2192 case IR_LOG2:
2193 case IR_NOISE1:
2194 case IR_NOISE2:
2195 case IR_NOISE3:
2196 case IR_NOISE4:
2197 /* binary */
2198 case IR_ADD:
2199 case IR_SUB:
2200 case IR_MUL:
2201 case IR_DOT4:
2202 case IR_DOT3:
2203 case IR_CROSS:
2204 case IR_MIN:
2205 case IR_MAX:
2206 case IR_SEQUAL:
2207 case IR_SNEQUAL:
2208 case IR_SGE:
2209 case IR_SGT:
2210 case IR_SLE:
2211 case IR_SLT:
2212 case IR_POW:
2213 /* trinary operators */
2214 case IR_LRP:
2215 return emit_arith(emitInfo, n);
2216
2217 case IR_EQUAL:
2218 case IR_NOTEQUAL:
2219 return emit_compare(emitInfo, n);
2220
2221 case IR_CLAMP:
2222 return emit_clamp(emitInfo, n);
2223 case IR_TEX:
2224 case IR_TEXB:
2225 case IR_TEXP:
2226 return emit_tex(emitInfo, n);
2227 case IR_NEG:
2228 return emit_negation(emitInfo, n);
2229 case IR_FLOAT:
2230 /* find storage location for this float constant */
2231 n->Store->Index = _mesa_add_unnamed_constant(emitInfo->prog->Parameters,
2232 n->Value,
2233 n->Store->Size,
2234 &n->Store->Swizzle);
2235 if (n->Store->Index < 0) {
2236 slang_info_log_error(emitInfo->log, "Ran out of space for constants");
2237 return NULL;
2238 }
2239 return NULL;
2240
2241 case IR_COPY:
2242 return emit_copy(emitInfo, n);
2243
2244 case IR_COND:
2245 return emit_cond(emitInfo, n);
2246
2247 case IR_NOT:
2248 return emit_not(emitInfo, n);
2249
2250 case IR_LABEL:
2251 return emit_label(emitInfo, n);
2252
2253 case IR_KILL:
2254 return emit_kill(emitInfo);
2255
2256 case IR_CALL:
2257 /* new variable scope for subroutines/function calls */
2258 _slang_push_var_table(emitInfo->vt);
2259 inst = emit_fcall(emitInfo, n);
2260 _slang_pop_var_table(emitInfo->vt);
2261 return inst;
2262
2263 case IR_IF:
2264 return emit_if(emitInfo, n);
2265
2266 case IR_LOOP:
2267 return emit_loop(emitInfo, n);
2268 case IR_BREAK_IF_TRUE:
2269 case IR_CONT_IF_TRUE:
2270 return emit_cont_break_if_true(emitInfo, n);
2271 case IR_BREAK:
2272 /* fall-through */
2273 case IR_CONT:
2274 return emit_cont_break(emitInfo, n);
2275
2276 case IR_BEGIN_SUB:
2277 return new_instruction(emitInfo, OPCODE_BGNSUB);
2278 case IR_END_SUB:
2279 return new_instruction(emitInfo, OPCODE_ENDSUB);
2280 case IR_RETURN:
2281 return emit_return(emitInfo, n);
2282
2283 case IR_NOP:
2284 return NULL;
2285
2286 default:
2287 _mesa_problem(NULL, "Unexpected IR opcode in emit()\n");
2288 }
2289 return NULL;
2290 }
2291
2292
2293 /**
2294 * After code generation, any subroutines will be in separate program
2295 * objects. This function appends all the subroutines onto the main
2296 * program and resolves the linking of all the branch/call instructions.
2297 * XXX this logic should really be part of the linking process...
2298 */
2299 static void
2300 _slang_resolve_subroutines(slang_emit_info *emitInfo)
2301 {
2302 GET_CURRENT_CONTEXT(ctx);
2303 struct gl_program *mainP = emitInfo->prog;
2304 GLuint *subroutineLoc, i, total;
2305
2306 subroutineLoc
2307 = (GLuint *) _mesa_malloc(emitInfo->NumSubroutines * sizeof(GLuint));
2308
2309 /* total number of instructions */
2310 total = mainP->NumInstructions;
2311 for (i = 0; i < emitInfo->NumSubroutines; i++) {
2312 subroutineLoc[i] = total;
2313 total += emitInfo->Subroutines[i]->NumInstructions;
2314 }
2315
2316 /* adjust BranchTargets within the functions */
2317 for (i = 0; i < emitInfo->NumSubroutines; i++) {
2318 struct gl_program *sub = emitInfo->Subroutines[i];
2319 GLuint j;
2320 for (j = 0; j < sub->NumInstructions; j++) {
2321 struct prog_instruction *inst = sub->Instructions + j;
2322 if (inst->Opcode != OPCODE_CAL && inst->BranchTarget >= 0) {
2323 inst->BranchTarget += subroutineLoc[i];
2324 }
2325 }
2326 }
2327
2328 /* append subroutines' instructions after main's instructions */
2329 mainP->Instructions = _mesa_realloc_instructions(mainP->Instructions,
2330 mainP->NumInstructions,
2331 total);
2332 mainP->NumInstructions = total;
2333 for (i = 0; i < emitInfo->NumSubroutines; i++) {
2334 struct gl_program *sub = emitInfo->Subroutines[i];
2335 _mesa_copy_instructions(mainP->Instructions + subroutineLoc[i],
2336 sub->Instructions,
2337 sub->NumInstructions);
2338 /* delete subroutine code */
2339 sub->Parameters = NULL; /* prevent double-free */
2340 _mesa_reference_program(ctx, &emitInfo->Subroutines[i], NULL);
2341 }
2342
2343 /* free subroutine list */
2344 if (emitInfo->Subroutines) {
2345 _mesa_free(emitInfo->Subroutines);
2346 emitInfo->Subroutines = NULL;
2347 }
2348 emitInfo->NumSubroutines = 0;
2349
2350 /* Examine CAL instructions.
2351 * At this point, the BranchTarget field of the CAL instruction is
2352 * the number/id of the subroutine to call (an index into the
2353 * emitInfo->Subroutines list).
2354 * Translate that into an actual instruction location now.
2355 */
2356 for (i = 0; i < mainP->NumInstructions; i++) {
2357 struct prog_instruction *inst = mainP->Instructions + i;
2358 if (inst->Opcode == OPCODE_CAL) {
2359 const GLuint f = inst->BranchTarget;
2360 inst->BranchTarget = subroutineLoc[f];
2361 }
2362 }
2363
2364 _mesa_free(subroutineLoc);
2365 }
2366
2367
2368
2369
2370 GLboolean
2371 _slang_emit_code(slang_ir_node *n, slang_var_table *vt,
2372 struct gl_program *prog, GLboolean withEnd,
2373 slang_info_log *log)
2374 {
2375 GET_CURRENT_CONTEXT(ctx);
2376 GLboolean success;
2377 slang_emit_info emitInfo;
2378 GLuint maxUniforms;
2379
2380 emitInfo.log = log;
2381 emitInfo.vt = vt;
2382 emitInfo.prog = prog;
2383 emitInfo.Subroutines = NULL;
2384 emitInfo.NumSubroutines = 0;
2385 emitInfo.MaxInstructions = prog->NumInstructions;
2386
2387 emitInfo.EmitHighLevelInstructions = ctx->Shader.EmitHighLevelInstructions;
2388 emitInfo.EmitCondCodes = ctx->Shader.EmitCondCodes;
2389 emitInfo.EmitComments = ctx->Shader.EmitComments;
2390 emitInfo.EmitBeginEndSub = GL_TRUE;
2391
2392 if (!emitInfo.EmitCondCodes) {
2393 emitInfo.EmitHighLevelInstructions = GL_TRUE;
2394 }
2395
2396 /* Check uniform/constant limits */
2397 if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
2398 maxUniforms = ctx->Const.FragmentProgram.MaxUniformComponents / 4;
2399 }
2400 else {
2401 assert(prog->Target == GL_VERTEX_PROGRAM_ARB);
2402 maxUniforms = ctx->Const.VertexProgram.MaxUniformComponents / 4;
2403 }
2404 if (prog->Parameters->NumParameters > maxUniforms) {
2405 slang_info_log_error(log, "Constant/uniform register limit exceeded");
2406 return GL_FALSE;
2407 }
2408
2409 (void) emit(&emitInfo, n);
2410
2411 /* finish up by adding the END opcode to program */
2412 if (withEnd) {
2413 struct prog_instruction *inst;
2414 inst = new_instruction(&emitInfo, OPCODE_END);
2415 }
2416
2417 _slang_resolve_subroutines(&emitInfo);
2418
2419 success = GL_TRUE;
2420
2421 #if 0
2422 printf("*********** End emit code (%u inst):\n", prog->NumInstructions);
2423 _mesa_print_program(prog);
2424 _mesa_print_program_parameters(ctx,prog);
2425 #endif
2426
2427 return success;
2428 }