Merge commit 'origin/master' into gallium-0.2
[mesa.git] / src / mesa / shader / slang / slang_emit.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2005-2008 Brian Paul All Rights Reserved.
5 * Copyright (C) 2008 VMware, Inc. All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file slang_emit.c
27 * Emit program instructions (PI code) from IR trees.
28 * \author Brian Paul
29 */
30
31 /***
32 *** NOTES
33 ***
34 *** To emit GPU instructions, we basically just do an in-order traversal
35 *** of the IR tree.
36 ***/
37
38
39 #include "main/imports.h"
40 #include "main/context.h"
41 #include "main/macros.h"
42 #include "shader/program.h"
43 #include "shader/prog_instruction.h"
44 #include "shader/prog_parameter.h"
45 #include "shader/prog_print.h"
46 #include "slang_builtin.h"
47 #include "slang_emit.h"
48 #include "slang_mem.h"
49
50
51 #define PEEPHOLE_OPTIMIZATIONS 1
52 #define ANNOTATE 0
53
54
55 typedef struct
56 {
57 slang_info_log *log;
58 slang_var_table *vt;
59 struct gl_program *prog;
60 struct gl_program **Subroutines;
61 GLuint NumSubroutines;
62
63 GLuint MaxInstructions; /**< size of prog->Instructions[] buffer */
64
65 /* code-gen options */
66 GLboolean EmitHighLevelInstructions;
67 GLboolean EmitCondCodes;
68 GLboolean EmitComments;
69 GLboolean EmitBeginEndSub; /* XXX TEMPORARY */
70 } slang_emit_info;
71
72
73
74 static struct gl_program *
75 new_subroutine(slang_emit_info *emitInfo, GLuint *id)
76 {
77 GET_CURRENT_CONTEXT(ctx);
78 const GLuint n = emitInfo->NumSubroutines;
79
80 emitInfo->Subroutines = (struct gl_program **)
81 _mesa_realloc(emitInfo->Subroutines,
82 n * sizeof(struct gl_program),
83 (n + 1) * sizeof(struct gl_program));
84 emitInfo->Subroutines[n] = ctx->Driver.NewProgram(ctx, emitInfo->prog->Target, 0);
85 emitInfo->Subroutines[n]->Parameters = emitInfo->prog->Parameters;
86 emitInfo->NumSubroutines++;
87 *id = n;
88 return emitInfo->Subroutines[n];
89 }
90
91
92 /**
93 * Convert a writemask to a swizzle. Used for testing cond codes because
94 * we only want to test the cond code component(s) that was set by the
95 * previous instruction.
96 */
97 static GLuint
98 writemask_to_swizzle(GLuint writemask)
99 {
100 if (writemask == WRITEMASK_X)
101 return SWIZZLE_XXXX;
102 if (writemask == WRITEMASK_Y)
103 return SWIZZLE_YYYY;
104 if (writemask == WRITEMASK_Z)
105 return SWIZZLE_ZZZZ;
106 if (writemask == WRITEMASK_W)
107 return SWIZZLE_WWWW;
108 return SWIZZLE_XYZW; /* shouldn't be hit */
109 }
110
111
112 /**
113 * Convert a swizzle mask to a writemask.
114 * Note that the slang_ir_storage->Swizzle field can represent either a
115 * swizzle mask or a writemask, depending on how it's used. For example,
116 * when we parse "direction.yz" alone, we don't know whether .yz is a
117 * writemask or a swizzle. In this case, we encode ".yz" in store->Swizzle
118 * as a swizzle mask (.yz?? actually). Later, if direction.yz is used as
119 * an R-value, we use store->Swizzle as-is. Otherwise, if direction.yz is
120 * used as an L-value, we convert it to a writemask.
121 */
122 static GLuint
123 swizzle_to_writemask(GLuint swizzle)
124 {
125 GLuint i, writemask = 0x0;
126 for (i = 0; i < 4; i++) {
127 GLuint swz = GET_SWZ(swizzle, i);
128 if (swz <= SWIZZLE_W) {
129 writemask |= (1 << swz);
130 }
131 }
132 return writemask;
133 }
134
135
136 /**
137 * Swizzle a swizzle (function composition).
138 * That is, return swz2(swz1), or said another way: swz1.szw2
139 * Example: swizzle_swizzle(".zwxx", ".xxyw") yields ".zzwx"
140 */
141 GLuint
142 _slang_swizzle_swizzle(GLuint swz1, GLuint swz2)
143 {
144 GLuint i, swz, s[4];
145 for (i = 0; i < 4; i++) {
146 GLuint c = GET_SWZ(swz2, i);
147 if (c <= SWIZZLE_W)
148 s[i] = GET_SWZ(swz1, c);
149 else
150 s[i] = c;
151 }
152 swz = MAKE_SWIZZLE4(s[0], s[1], s[2], s[3]);
153 return swz;
154 }
155
156
157 /**
158 * Return the default swizzle mask for accessing a variable of the
159 * given size (in floats). If size = 1, comp is used to identify
160 * which component [0..3] of the register holds the variable.
161 */
162 GLuint
163 _slang_var_swizzle(GLint size, GLint comp)
164 {
165 switch (size) {
166 case 1:
167 return MAKE_SWIZZLE4(comp, comp, comp, comp);
168 case 2:
169 return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_NIL, SWIZZLE_NIL);
170 case 3:
171 return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_NIL);
172 default:
173 return SWIZZLE_XYZW;
174 }
175 }
176
177
178
179 /**
180 * Allocate storage for the given node (if it hasn't already been allocated).
181 *
182 * Typically this is temporary storage for an intermediate result (such as
183 * for a multiply or add, etc).
184 *
185 * If n->Store does not exist it will be created and will be of the size
186 * specified by defaultSize.
187 */
188 static GLboolean
189 alloc_node_storage(slang_emit_info *emitInfo, slang_ir_node *n,
190 GLint defaultSize)
191 {
192 assert(!n->Var);
193 if (!n->Store) {
194 assert(defaultSize > 0);
195 n->Store = _slang_new_ir_storage(PROGRAM_TEMPORARY, -1, defaultSize);
196 }
197
198 /* now allocate actual register(s). I.e. set n->Store->Index >= 0 */
199 if (n->Store->Index < 0) {
200 if (!_slang_alloc_temp(emitInfo->vt, n->Store)) {
201 slang_info_log_error(emitInfo->log,
202 "Ran out of registers, too many temporaries");
203 _slang_free(n->Store);
204 n->Store = NULL;
205 return GL_FALSE;
206 }
207 }
208 return GL_TRUE;
209 }
210
211
212 /**
213 * Free temporary storage, if n->Store is, in fact, temp storage.
214 * Otherwise, no-op.
215 */
216 static void
217 free_node_storage(slang_var_table *vt, slang_ir_node *n)
218 {
219 if (n->Store->File == PROGRAM_TEMPORARY &&
220 n->Store->Index >= 0 &&
221 n->Opcode != IR_SWIZZLE) {
222 if (_slang_is_temp(vt, n->Store)) {
223 _slang_free_temp(vt, n->Store);
224 n->Store->Index = -1;
225 n->Store = NULL; /* XXX this may not be needed */
226 }
227 }
228 }
229
230
231 /**
232 * Helper function to allocate a short-term temporary.
233 * Free it with _slang_free_temp().
234 */
235 static GLboolean
236 alloc_local_temp(slang_emit_info *emitInfo, slang_ir_storage *temp, GLint size)
237 {
238 assert(size >= 1);
239 assert(size <= 4);
240 _mesa_bzero(temp, sizeof(*temp));
241 temp->Size = size;
242 temp->File = PROGRAM_TEMPORARY;
243 temp->Index = -1;
244 return _slang_alloc_temp(emitInfo->vt, temp);
245 }
246
247
248 /**
249 * Remove any SWIZZLE_NIL terms from given swizzle mask.
250 * For a swizzle like .z??? generate .zzzz (replicate single component).
251 * Else, for .wx?? generate .wxzw (insert default component for the position).
252 */
253 static GLuint
254 fix_swizzle(GLuint swizzle)
255 {
256 GLuint c0 = GET_SWZ(swizzle, 0),
257 c1 = GET_SWZ(swizzle, 1),
258 c2 = GET_SWZ(swizzle, 2),
259 c3 = GET_SWZ(swizzle, 3);
260 if (c1 == SWIZZLE_NIL && c2 == SWIZZLE_NIL && c3 == SWIZZLE_NIL) {
261 /* smear first component across all positions */
262 c1 = c2 = c3 = c0;
263 }
264 else {
265 /* insert default swizzle components */
266 if (c0 == SWIZZLE_NIL)
267 c0 = SWIZZLE_X;
268 if (c1 == SWIZZLE_NIL)
269 c1 = SWIZZLE_Y;
270 if (c2 == SWIZZLE_NIL)
271 c2 = SWIZZLE_Z;
272 if (c3 == SWIZZLE_NIL)
273 c3 = SWIZZLE_W;
274 }
275 return MAKE_SWIZZLE4(c0, c1, c2, c3);
276 }
277
278
279
280 /**
281 * Convert IR storage to an instruction dst register.
282 */
283 static void
284 storage_to_dst_reg(struct prog_dst_register *dst, const slang_ir_storage *st)
285 {
286 const GLboolean relAddr = st->RelAddr;
287 const GLint size = st->Size;
288 GLint index = st->Index;
289 GLuint swizzle = st->Swizzle;
290
291 assert(index >= 0);
292 /* if this is storage relative to some parent storage, walk up the tree */
293 while (st->Parent) {
294 st = st->Parent;
295 assert(st->Index >= 0);
296 index += st->Index;
297 swizzle = _slang_swizzle_swizzle(st->Swizzle, swizzle);
298 }
299
300 assert(st->File != PROGRAM_UNDEFINED);
301 dst->File = st->File;
302
303 assert(index >= 0);
304 dst->Index = index;
305
306 assert(size >= 1);
307 assert(size <= 4);
308
309 if (swizzle != SWIZZLE_XYZW) {
310 dst->WriteMask = swizzle_to_writemask(swizzle);
311 }
312 else {
313 GLuint writemask;
314 switch (size) {
315 case 1:
316 writemask = WRITEMASK_X << GET_SWZ(st->Swizzle, 0);
317 break;
318 case 2:
319 writemask = WRITEMASK_XY;
320 break;
321 case 3:
322 writemask = WRITEMASK_XYZ;
323 break;
324 case 4:
325 writemask = WRITEMASK_XYZW;
326 break;
327 default:
328 ; /* error would have been caught above */
329 }
330 dst->WriteMask = writemask;
331 }
332
333 dst->RelAddr = relAddr;
334 }
335
336
337 /**
338 * Convert IR storage to an instruction src register.
339 */
340 static void
341 storage_to_src_reg(struct prog_src_register *src, const slang_ir_storage *st)
342 {
343 const GLboolean relAddr = st->RelAddr;
344 GLint index = st->Index;
345 GLuint swizzle = st->Swizzle;
346
347 /* if this is storage relative to some parent storage, walk up the tree */
348 assert(index >= 0);
349 while (st->Parent) {
350 st = st->Parent;
351 assert(st->Index >= 0);
352 index += st->Index;
353 swizzle = _slang_swizzle_swizzle(fix_swizzle(st->Swizzle), swizzle);
354 }
355
356 assert(st->File >= 0);
357 #if 1 /* XXX temporary */
358 if (st->File == PROGRAM_UNDEFINED) {
359 slang_ir_storage *st0 = (slang_ir_storage *) st;
360 st0->File = PROGRAM_TEMPORARY;
361 }
362 #endif
363 assert(st->File < PROGRAM_UNDEFINED);
364 src->File = st->File;
365
366 assert(index >= 0);
367 src->Index = index;
368
369 swizzle = fix_swizzle(swizzle);
370 assert(GET_SWZ(swizzle, 0) <= SWIZZLE_W);
371 assert(GET_SWZ(swizzle, 1) <= SWIZZLE_W);
372 assert(GET_SWZ(swizzle, 2) <= SWIZZLE_W);
373 assert(GET_SWZ(swizzle, 3) <= SWIZZLE_W);
374 src->Swizzle = swizzle;
375
376 src->RelAddr = relAddr;
377 }
378
379
380 /*
381 * Setup storage pointing to a scalar constant/literal.
382 */
383 static void
384 constant_to_storage(slang_emit_info *emitInfo,
385 GLfloat val,
386 slang_ir_storage *store)
387 {
388 GLuint swizzle;
389 GLint reg;
390 GLfloat value[4];
391
392 value[0] = val;
393 reg = _mesa_add_unnamed_constant(emitInfo->prog->Parameters,
394 value, 1, &swizzle);
395
396 memset(store, 0, sizeof(*store));
397 store->File = PROGRAM_CONSTANT;
398 store->Index = reg;
399 store->Swizzle = swizzle;
400 }
401
402
403 /**
404 * Add new instruction at end of given program.
405 * \param prog the program to append instruction onto
406 * \param opcode opcode for the new instruction
407 * \return pointer to the new instruction
408 */
409 static struct prog_instruction *
410 new_instruction(slang_emit_info *emitInfo, gl_inst_opcode opcode)
411 {
412 struct gl_program *prog = emitInfo->prog;
413 struct prog_instruction *inst;
414
415 #if 0
416 /* print prev inst */
417 if (prog->NumInstructions > 0) {
418 _mesa_print_instruction(prog->Instructions + prog->NumInstructions - 1);
419 }
420 #endif
421 assert(prog->NumInstructions <= emitInfo->MaxInstructions);
422
423 if (prog->NumInstructions == emitInfo->MaxInstructions) {
424 /* grow the instruction buffer */
425 emitInfo->MaxInstructions += 20;
426 prog->Instructions =
427 _mesa_realloc_instructions(prog->Instructions,
428 prog->NumInstructions,
429 emitInfo->MaxInstructions);
430 }
431
432 inst = prog->Instructions + prog->NumInstructions;
433 prog->NumInstructions++;
434 _mesa_init_instructions(inst, 1);
435 inst->Opcode = opcode;
436 inst->BranchTarget = -1; /* invalid */
437 /*
438 printf("New inst %d: %p %s\n", prog->NumInstructions-1,(void*)inst,
439 _mesa_opcode_string(inst->Opcode));
440 */
441 return inst;
442 }
443
444
445 static struct prog_instruction *
446 emit_arl_load(slang_emit_info *emitInfo,
447 enum register_file file, GLint index, GLuint swizzle)
448 {
449 struct prog_instruction *inst = new_instruction(emitInfo, OPCODE_ARL);
450 inst->SrcReg[0].File = file;
451 inst->SrcReg[0].Index = index;
452 inst->SrcReg[0].Swizzle = swizzle;
453 inst->DstReg.File = PROGRAM_ADDRESS;
454 inst->DstReg.Index = 0;
455 inst->DstReg.WriteMask = WRITEMASK_X;
456 return inst;
457 }
458
459
460 /**
461 * Emit a new instruction with given opcode, operands.
462 * At this point the instruction may have multiple indirect register
463 * loads/stores. We convert those into ARL loads and address-relative
464 * operands. See comments inside.
465 * At some point in the future we could directly emit indirectly addressed
466 * registers in Mesa GPU instructions.
467 */
468 static struct prog_instruction *
469 emit_instruction(slang_emit_info *emitInfo,
470 gl_inst_opcode opcode,
471 const slang_ir_storage *dst,
472 const slang_ir_storage *src0,
473 const slang_ir_storage *src1,
474 const slang_ir_storage *src2)
475 {
476 struct prog_instruction *inst;
477 GLuint numIndirect = 0;
478 const slang_ir_storage *src[3];
479 slang_ir_storage newSrc[3], newDst;
480 GLuint i;
481 GLboolean isTemp[3];
482
483 isTemp[0] = isTemp[1] = isTemp[2] = GL_FALSE;
484
485 src[0] = src0;
486 src[1] = src1;
487 src[2] = src2;
488
489 /* count up how many operands are indirect loads */
490 for (i = 0; i < 3; i++) {
491 if (src[i] && src[i]->IsIndirect)
492 numIndirect++;
493 }
494 if (dst && dst->IsIndirect)
495 numIndirect++;
496
497 /* Take special steps for indirect register loads.
498 * If we had multiple address registers this would be simpler.
499 * For example, this GLSL code:
500 * x[i] = y[j] + z[k];
501 * would translate into something like:
502 * ARL ADDR.x, i;
503 * ARL ADDR.y, j;
504 * ARL ADDR.z, k;
505 * ADD TEMP[ADDR.x+5], TEMP[ADDR.y+9], TEMP[ADDR.z+4];
506 * But since we currently only have one address register we have to do this:
507 * ARL ADDR.x, i;
508 * MOV t1, TEMP[ADDR.x+9];
509 * ARL ADDR.x, j;
510 * MOV t2, TEMP[ADDR.x+4];
511 * ARL ADDR.x, k;
512 * ADD TEMP[ADDR.x+5], t1, t2;
513 * The code here figures this out...
514 */
515 if (numIndirect > 0) {
516 for (i = 0; i < 3; i++) {
517 if (src[i] && src[i]->IsIndirect) {
518 /* load the ARL register with the indirect register */
519 emit_arl_load(emitInfo,
520 src[i]->IndirectFile,
521 src[i]->IndirectIndex,
522 src[i]->IndirectSwizzle);
523
524 if (numIndirect > 1) {
525 /* Need to load src[i] into a temporary register */
526 slang_ir_storage srcRelAddr;
527 alloc_local_temp(emitInfo, &newSrc[i], src[i]->Size);
528 isTemp[i] = GL_TRUE;
529
530 /* set RelAddr flag on src register */
531 srcRelAddr = *src[i];
532 srcRelAddr.RelAddr = GL_TRUE;
533 srcRelAddr.IsIndirect = GL_FALSE; /* not really needed */
534
535 /* MOV newSrc, srcRelAddr; */
536 inst = emit_instruction(emitInfo,
537 OPCODE_MOV,
538 &newSrc[i],
539 &srcRelAddr,
540 NULL,
541 NULL);
542
543 src[i] = &newSrc[i];
544 }
545 else {
546 /* just rewrite the src[i] storage to be ARL-relative */
547 newSrc[i] = *src[i];
548 newSrc[i].RelAddr = GL_TRUE;
549 newSrc[i].IsIndirect = GL_FALSE; /* not really needed */
550 src[i] = &newSrc[i];
551 }
552 }
553 }
554 }
555
556 /* Take special steps for indirect dest register write */
557 if (dst && dst->IsIndirect) {
558 /* load the ARL register with the indirect register */
559 emit_arl_load(emitInfo,
560 dst->IndirectFile,
561 dst->IndirectIndex,
562 dst->IndirectSwizzle);
563 newDst = *dst;
564 newDst.RelAddr = GL_TRUE;
565 newDst.IsIndirect = GL_FALSE;
566 dst = &newDst;
567 }
568
569 /* OK, emit the instruction and its dst, src regs */
570 inst = new_instruction(emitInfo, opcode);
571 if (!inst)
572 return NULL;
573
574 if (dst)
575 storage_to_dst_reg(&inst->DstReg, dst);
576
577 for (i = 0; i < 3; i++) {
578 if (src[i])
579 storage_to_src_reg(&inst->SrcReg[i], src[i]);
580 }
581
582 /* Free any temp registers that we allocated above */
583 for (i = 0; i < 3; i++) {
584 if (isTemp[i])
585 _slang_free_temp(emitInfo->vt, &newSrc[i]);
586 }
587
588 return inst;
589 }
590
591
592
593 /**
594 * Put a comment on the given instruction.
595 */
596 static void
597 inst_comment(struct prog_instruction *inst, const char *comment)
598 {
599 if (inst)
600 inst->Comment = _mesa_strdup(comment);
601 }
602
603
604
605 /**
606 * Return pointer to last instruction in program.
607 */
608 static struct prog_instruction *
609 prev_instruction(slang_emit_info *emitInfo)
610 {
611 struct gl_program *prog = emitInfo->prog;
612 if (prog->NumInstructions == 0)
613 return NULL;
614 else
615 return prog->Instructions + prog->NumInstructions - 1;
616 }
617
618
619 static struct prog_instruction *
620 emit(slang_emit_info *emitInfo, slang_ir_node *n);
621
622
623 /**
624 * Return an annotation string for given node's storage.
625 */
626 static char *
627 storage_annotation(const slang_ir_node *n, const struct gl_program *prog)
628 {
629 #if ANNOTATE
630 const slang_ir_storage *st = n->Store;
631 static char s[100] = "";
632
633 if (!st)
634 return _mesa_strdup("");
635
636 switch (st->File) {
637 case PROGRAM_CONSTANT:
638 if (st->Index >= 0) {
639 const GLfloat *val = prog->Parameters->ParameterValues[st->Index];
640 if (st->Swizzle == SWIZZLE_NOOP)
641 sprintf(s, "{%g, %g, %g, %g}", val[0], val[1], val[2], val[3]);
642 else {
643 sprintf(s, "%g", val[GET_SWZ(st->Swizzle, 0)]);
644 }
645 }
646 break;
647 case PROGRAM_TEMPORARY:
648 if (n->Var)
649 sprintf(s, "%s", (char *) n->Var->a_name);
650 else
651 sprintf(s, "t[%d]", st->Index);
652 break;
653 case PROGRAM_STATE_VAR:
654 case PROGRAM_UNIFORM:
655 sprintf(s, "%s", prog->Parameters->Parameters[st->Index].Name);
656 break;
657 case PROGRAM_VARYING:
658 sprintf(s, "%s", prog->Varying->Parameters[st->Index].Name);
659 break;
660 case PROGRAM_INPUT:
661 sprintf(s, "input[%d]", st->Index);
662 break;
663 case PROGRAM_OUTPUT:
664 sprintf(s, "output[%d]", st->Index);
665 break;
666 default:
667 s[0] = 0;
668 }
669 return _mesa_strdup(s);
670 #else
671 return NULL;
672 #endif
673 }
674
675
676 /**
677 * Return an annotation string for an instruction.
678 */
679 static char *
680 instruction_annotation(gl_inst_opcode opcode, char *dstAnnot,
681 char *srcAnnot0, char *srcAnnot1, char *srcAnnot2)
682 {
683 #if ANNOTATE
684 const char *operator;
685 char *s;
686 int len = 50;
687
688 if (dstAnnot)
689 len += strlen(dstAnnot);
690 else
691 dstAnnot = _mesa_strdup("");
692
693 if (srcAnnot0)
694 len += strlen(srcAnnot0);
695 else
696 srcAnnot0 = _mesa_strdup("");
697
698 if (srcAnnot1)
699 len += strlen(srcAnnot1);
700 else
701 srcAnnot1 = _mesa_strdup("");
702
703 if (srcAnnot2)
704 len += strlen(srcAnnot2);
705 else
706 srcAnnot2 = _mesa_strdup("");
707
708 switch (opcode) {
709 case OPCODE_ADD:
710 operator = "+";
711 break;
712 case OPCODE_SUB:
713 operator = "-";
714 break;
715 case OPCODE_MUL:
716 operator = "*";
717 break;
718 case OPCODE_DP2:
719 operator = "DP2";
720 break;
721 case OPCODE_DP3:
722 operator = "DP3";
723 break;
724 case OPCODE_DP4:
725 operator = "DP4";
726 break;
727 case OPCODE_XPD:
728 operator = "XPD";
729 break;
730 case OPCODE_RSQ:
731 operator = "RSQ";
732 break;
733 case OPCODE_SGT:
734 operator = ">";
735 break;
736 default:
737 operator = ",";
738 }
739
740 s = (char *) malloc(len);
741 sprintf(s, "%s = %s %s %s %s", dstAnnot,
742 srcAnnot0, operator, srcAnnot1, srcAnnot2);
743 assert(_mesa_strlen(s) < len);
744
745 free(dstAnnot);
746 free(srcAnnot0);
747 free(srcAnnot1);
748 free(srcAnnot2);
749
750 return s;
751 #else
752 return NULL;
753 #endif
754 }
755
756
757 /**
758 * Emit an instruction that's just a comment.
759 */
760 static struct prog_instruction *
761 emit_comment(slang_emit_info *emitInfo, const char *comment)
762 {
763 struct prog_instruction *inst = new_instruction(emitInfo, OPCODE_NOP);
764 inst_comment(inst, comment);
765 return inst;
766 }
767
768
769 /**
770 * Generate code for a simple arithmetic instruction.
771 * Either 1, 2 or 3 operands.
772 */
773 static struct prog_instruction *
774 emit_arith(slang_emit_info *emitInfo, slang_ir_node *n)
775 {
776 const slang_ir_info *info = _slang_ir_info(n->Opcode);
777 struct prog_instruction *inst;
778 GLuint i;
779
780 assert(info);
781 assert(info->InstOpcode != OPCODE_NOP);
782
783 #if PEEPHOLE_OPTIMIZATIONS
784 /* Look for MAD opportunity */
785 if (info->NumParams == 2 &&
786 n->Opcode == IR_ADD && n->Children[0]->Opcode == IR_MUL) {
787 /* found pattern IR_ADD(IR_MUL(A, B), C) */
788 emit(emitInfo, n->Children[0]->Children[0]); /* A */
789 emit(emitInfo, n->Children[0]->Children[1]); /* B */
790 emit(emitInfo, n->Children[1]); /* C */
791 alloc_node_storage(emitInfo, n, -1); /* dest */
792
793 inst = emit_instruction(emitInfo,
794 OPCODE_MAD,
795 n->Store,
796 n->Children[0]->Children[0]->Store,
797 n->Children[0]->Children[1]->Store,
798 n->Children[1]->Store);
799
800 free_node_storage(emitInfo->vt, n->Children[0]->Children[0]);
801 free_node_storage(emitInfo->vt, n->Children[0]->Children[1]);
802 free_node_storage(emitInfo->vt, n->Children[1]);
803 return inst;
804 }
805
806 if (info->NumParams == 2 &&
807 n->Opcode == IR_ADD && n->Children[1]->Opcode == IR_MUL) {
808 /* found pattern IR_ADD(A, IR_MUL(B, C)) */
809 emit(emitInfo, n->Children[0]); /* A */
810 emit(emitInfo, n->Children[1]->Children[0]); /* B */
811 emit(emitInfo, n->Children[1]->Children[1]); /* C */
812 alloc_node_storage(emitInfo, n, -1); /* dest */
813
814 inst = emit_instruction(emitInfo,
815 OPCODE_MAD,
816 n->Store,
817 n->Children[1]->Children[0]->Store,
818 n->Children[1]->Children[1]->Store,
819 n->Children[0]->Store);
820
821 free_node_storage(emitInfo->vt, n->Children[1]->Children[0]);
822 free_node_storage(emitInfo->vt, n->Children[1]->Children[1]);
823 free_node_storage(emitInfo->vt, n->Children[0]);
824 return inst;
825 }
826 #endif
827
828 /* gen code for children, may involve temp allocation */
829 for (i = 0; i < info->NumParams; i++) {
830 emit(emitInfo, n->Children[i]);
831 if (!n->Children[i] || !n->Children[i]->Store) {
832 /* error recovery */
833 return NULL;
834 }
835 }
836
837 /* result storage */
838 alloc_node_storage(emitInfo, n, -1);
839
840 inst = emit_instruction(emitInfo,
841 info->InstOpcode,
842 n->Store, /* dest */
843 (info->NumParams > 0 ? n->Children[0]->Store : NULL),
844 (info->NumParams > 1 ? n->Children[1]->Store : NULL),
845 (info->NumParams > 2 ? n->Children[2]->Store : NULL)
846 );
847
848 /* free temps */
849 for (i = 0; i < info->NumParams; i++)
850 free_node_storage(emitInfo->vt, n->Children[i]);
851
852 return inst;
853 }
854
855
856 /**
857 * Emit code for == and != operators. These could normally be handled
858 * by emit_arith() except we need to be able to handle structure comparisons.
859 */
860 static struct prog_instruction *
861 emit_compare(slang_emit_info *emitInfo, slang_ir_node *n)
862 {
863 struct prog_instruction *inst = NULL;
864 GLint size;
865
866 assert(n->Opcode == IR_EQUAL || n->Opcode == IR_NOTEQUAL);
867
868 /* gen code for children */
869 emit(emitInfo, n->Children[0]);
870 emit(emitInfo, n->Children[1]);
871
872 if (n->Children[0]->Store->Size != n->Children[1]->Store->Size) {
873 slang_info_log_error(emitInfo->log, "invalid operands to == or !=");
874 return NULL;
875 }
876
877 /* final result is 1 bool */
878 if (!alloc_node_storage(emitInfo, n, 1))
879 return NULL;
880
881 size = n->Children[0]->Store->Size;
882
883 if (size == 1) {
884 gl_inst_opcode opcode = n->Opcode == IR_EQUAL ? OPCODE_SEQ : OPCODE_SNE;
885 inst = emit_instruction(emitInfo,
886 opcode,
887 n->Store, /* dest */
888 n->Children[0]->Store,
889 n->Children[1]->Store,
890 NULL);
891 }
892 else if (size <= 4) {
893 /* compare two vectors.
894 * Unfortunately, there's no instruction to compare vectors and
895 * return a scalar result. Do it with some compare and dot product
896 * instructions...
897 */
898 GLuint swizzle;
899 gl_inst_opcode dotOp;
900 slang_ir_storage tempStore;
901
902 if (!alloc_local_temp(emitInfo, &tempStore, 4)) {
903 return NULL;
904 /* out of temps */
905 }
906
907 if (size == 4) {
908 dotOp = OPCODE_DP4;
909 swizzle = SWIZZLE_XYZW;
910 }
911 else if (size == 3) {
912 dotOp = OPCODE_DP3;
913 swizzle = SWIZZLE_XYZW;
914 }
915 else {
916 assert(size == 2);
917 dotOp = OPCODE_DP3; /* XXX use OPCODE_DP2 eventually */
918 swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y);
919 }
920
921 /* Compute inequality (temp = (A != B)) */
922 inst = emit_instruction(emitInfo,
923 OPCODE_SNE,
924 &tempStore,
925 n->Children[0]->Store,
926 n->Children[1]->Store,
927 NULL);
928 inst_comment(inst, "Compare values");
929
930 /* Compute val = DOT(temp, temp) (reduction) */
931 inst = emit_instruction(emitInfo,
932 dotOp,
933 n->Store,
934 &tempStore,
935 &tempStore,
936 NULL);
937 inst->SrcReg[0].Swizzle = inst->SrcReg[1].Swizzle = swizzle; /*override*/
938 inst_comment(inst, "Reduce vec to bool");
939
940 _slang_free_temp(emitInfo->vt, &tempStore); /* free temp */
941
942 if (n->Opcode == IR_EQUAL) {
943 /* compute val = !val.x with SEQ val, val, 0; */
944 slang_ir_storage zero;
945 constant_to_storage(emitInfo, 0.0, &zero);
946 inst = emit_instruction(emitInfo,
947 OPCODE_SEQ,
948 n->Store, /* dest */
949 n->Store,
950 &zero,
951 NULL);
952 inst_comment(inst, "Invert true/false");
953 }
954 }
955 else {
956 /* size > 4, struct or array compare.
957 * XXX this won't work reliably for structs with padding!!
958 */
959 GLint i, num = (n->Children[0]->Store->Size + 3) / 4;
960 slang_ir_storage accTemp, sneTemp;
961
962 if (!alloc_local_temp(emitInfo, &accTemp, 4))
963 return NULL;
964
965 if (!alloc_local_temp(emitInfo, &sneTemp, 4))
966 return NULL;
967
968 for (i = 0; i < num; i++) {
969 slang_ir_storage srcStore0 = *n->Children[0]->Store;
970 slang_ir_storage srcStore1 = *n->Children[1]->Store;
971 srcStore0.Index += i;
972 srcStore1.Index += i;
973
974 if (i == 0) {
975 /* SNE accTemp, left[i], right[i] */
976 inst = emit_instruction(emitInfo, OPCODE_SNE,
977 &accTemp, /* dest */
978 &srcStore0,
979 &srcStore1,
980 NULL);
981 inst_comment(inst, "Begin struct/array comparison");
982 }
983 else {
984 /* SNE sneTemp, left[i], right[i] */
985 inst = emit_instruction(emitInfo, OPCODE_SNE,
986 &sneTemp, /* dest */
987 &srcStore0,
988 &srcStore1,
989 NULL);
990 /* ADD accTemp, accTemp, sneTemp; # like logical-OR */
991 inst = emit_instruction(emitInfo, OPCODE_ADD,
992 &accTemp, /* dest */
993 &accTemp,
994 &sneTemp,
995 NULL);
996 }
997 }
998
999 /* compute accTemp.x || accTemp.y || accTemp.z || accTemp.w with DOT4 */
1000 inst = emit_instruction(emitInfo, OPCODE_DP4,
1001 n->Store,
1002 &accTemp,
1003 &accTemp,
1004 NULL);
1005 inst_comment(inst, "End struct/array comparison");
1006
1007 if (n->Opcode == IR_EQUAL) {
1008 /* compute tmp.x = !tmp.x via tmp.x = (tmp.x == 0) */
1009 slang_ir_storage zero;
1010 constant_to_storage(emitInfo, 0.0, &zero);
1011 inst = emit_instruction(emitInfo, OPCODE_SEQ,
1012 n->Store, /* dest */
1013 n->Store,
1014 &zero,
1015 NULL);
1016 inst_comment(inst, "Invert true/false");
1017 }
1018
1019 _slang_free_temp(emitInfo->vt, &accTemp);
1020 _slang_free_temp(emitInfo->vt, &sneTemp);
1021 }
1022
1023 /* free temps */
1024 free_node_storage(emitInfo->vt, n->Children[0]);
1025 free_node_storage(emitInfo->vt, n->Children[1]);
1026
1027 return inst;
1028 }
1029
1030
1031
1032 /**
1033 * Generate code for an IR_CLAMP instruction.
1034 */
1035 static struct prog_instruction *
1036 emit_clamp(slang_emit_info *emitInfo, slang_ir_node *n)
1037 {
1038 struct prog_instruction *inst;
1039 slang_ir_node tmpNode;
1040
1041 assert(n->Opcode == IR_CLAMP);
1042 /* ch[0] = value
1043 * ch[1] = min limit
1044 * ch[2] = max limit
1045 */
1046
1047 inst = emit(emitInfo, n->Children[0]);
1048
1049 /* If lower limit == 0.0 and upper limit == 1.0,
1050 * set prev instruction's SaturateMode field to SATURATE_ZERO_ONE.
1051 * Else,
1052 * emit OPCODE_MIN, OPCODE_MAX sequence.
1053 */
1054 #if 0
1055 /* XXX this isn't quite finished yet */
1056 if (n->Children[1]->Opcode == IR_FLOAT &&
1057 n->Children[1]->Value[0] == 0.0 &&
1058 n->Children[1]->Value[1] == 0.0 &&
1059 n->Children[1]->Value[2] == 0.0 &&
1060 n->Children[1]->Value[3] == 0.0 &&
1061 n->Children[2]->Opcode == IR_FLOAT &&
1062 n->Children[2]->Value[0] == 1.0 &&
1063 n->Children[2]->Value[1] == 1.0 &&
1064 n->Children[2]->Value[2] == 1.0 &&
1065 n->Children[2]->Value[3] == 1.0) {
1066 if (!inst) {
1067 inst = prev_instruction(prog);
1068 }
1069 if (inst && inst->Opcode != OPCODE_NOP) {
1070 /* and prev instruction's DstReg matches n->Children[0]->Store */
1071 inst->SaturateMode = SATURATE_ZERO_ONE;
1072 n->Store = n->Children[0]->Store;
1073 return inst;
1074 }
1075 }
1076 #endif
1077
1078 if (!alloc_node_storage(emitInfo, n, n->Children[0]->Store->Size))
1079 return NULL;
1080
1081 emit(emitInfo, n->Children[1]);
1082 emit(emitInfo, n->Children[2]);
1083
1084 /* Some GPUs don't allow reading from output registers. So if the
1085 * dest for this clamp() is an output reg, we can't use that reg for
1086 * the intermediate result. Use a temp register instead.
1087 */
1088 _mesa_bzero(&tmpNode, sizeof(tmpNode));
1089 alloc_node_storage(emitInfo, &tmpNode, n->Store->Size);
1090
1091 /* tmp = max(ch[0], ch[1]) */
1092 inst = emit_instruction(emitInfo, OPCODE_MAX,
1093 tmpNode.Store, /* dest */
1094 n->Children[0]->Store,
1095 n->Children[1]->Store,
1096 NULL);
1097
1098 /* n->dest = min(tmp, ch[2]) */
1099 inst = emit_instruction(emitInfo, OPCODE_MIN,
1100 n->Store, /* dest */
1101 tmpNode.Store,
1102 n->Children[2]->Store,
1103 NULL);
1104
1105 free_node_storage(emitInfo->vt, &tmpNode);
1106
1107 return inst;
1108 }
1109
1110
1111 static struct prog_instruction *
1112 emit_negation(slang_emit_info *emitInfo, slang_ir_node *n)
1113 {
1114 /* Implement as MOV dst, -src; */
1115 /* XXX we could look at the previous instruction and in some circumstances
1116 * modify it to accomplish the negation.
1117 */
1118 struct prog_instruction *inst;
1119
1120 emit(emitInfo, n->Children[0]);
1121
1122 if (!alloc_node_storage(emitInfo, n, n->Children[0]->Store->Size))
1123 return NULL;
1124
1125 inst = emit_instruction(emitInfo,
1126 OPCODE_MOV,
1127 n->Store, /* dest */
1128 n->Children[0]->Store,
1129 NULL,
1130 NULL);
1131 inst->SrcReg[0].NegateBase = NEGATE_XYZW;
1132 return inst;
1133 }
1134
1135
1136 static struct prog_instruction *
1137 emit_label(slang_emit_info *emitInfo, const slang_ir_node *n)
1138 {
1139 assert(n->Label);
1140 #if 0
1141 /* XXX this fails in loop tail code - investigate someday */
1142 assert(_slang_label_get_location(n->Label) < 0);
1143 _slang_label_set_location(n->Label, emitInfo->prog->NumInstructions,
1144 emitInfo->prog);
1145 #else
1146 if (_slang_label_get_location(n->Label) < 0)
1147 _slang_label_set_location(n->Label, emitInfo->prog->NumInstructions,
1148 emitInfo->prog);
1149 #endif
1150 return NULL;
1151 }
1152
1153
1154 /**
1155 * Emit code for a function call.
1156 * Note that for each time a function is called, we emit the function's
1157 * body code again because the set of available registers may be different.
1158 */
1159 static struct prog_instruction *
1160 emit_fcall(slang_emit_info *emitInfo, slang_ir_node *n)
1161 {
1162 struct gl_program *progSave;
1163 struct prog_instruction *inst;
1164 GLuint subroutineId;
1165 GLuint maxInstSave;
1166
1167 assert(n->Opcode == IR_CALL);
1168 assert(n->Label);
1169
1170 /* save/push cur program */
1171 maxInstSave = emitInfo->MaxInstructions;
1172 progSave = emitInfo->prog;
1173
1174 emitInfo->prog = new_subroutine(emitInfo, &subroutineId);
1175 emitInfo->MaxInstructions = emitInfo->prog->NumInstructions;
1176
1177 _slang_label_set_location(n->Label, emitInfo->prog->NumInstructions,
1178 emitInfo->prog);
1179
1180 if (emitInfo->EmitBeginEndSub) {
1181 /* BGNSUB isn't a real instruction.
1182 * We require a label (i.e. "foobar:") though, if we're going to
1183 * print the program in the NV format. The BNGSUB instruction is
1184 * really just a NOP to attach the label to.
1185 */
1186 inst = new_instruction(emitInfo, OPCODE_BGNSUB);
1187 inst_comment(inst, n->Label->Name);
1188 }
1189
1190 /* body of function: */
1191 emit(emitInfo, n->Children[0]);
1192 n->Store = n->Children[0]->Store;
1193
1194 /* add RET instruction now, if needed */
1195 inst = prev_instruction(emitInfo);
1196 if (inst && inst->Opcode != OPCODE_RET) {
1197 inst = new_instruction(emitInfo, OPCODE_RET);
1198 }
1199
1200 if (emitInfo->EmitBeginEndSub) {
1201 inst = new_instruction(emitInfo, OPCODE_ENDSUB);
1202 inst_comment(inst, n->Label->Name);
1203 }
1204
1205 /* pop/restore cur program */
1206 emitInfo->prog = progSave;
1207 emitInfo->MaxInstructions = maxInstSave;
1208
1209 /* emit the function call */
1210 inst = new_instruction(emitInfo, OPCODE_CAL);
1211 /* The branch target is just the subroutine number (changed later) */
1212 inst->BranchTarget = subroutineId;
1213 inst_comment(inst, n->Label->Name);
1214 assert(inst->BranchTarget >= 0);
1215
1216 return inst;
1217 }
1218
1219
1220 /**
1221 * Emit code for a 'return' statement.
1222 */
1223 static struct prog_instruction *
1224 emit_return(slang_emit_info *emitInfo, slang_ir_node *n)
1225 {
1226 struct prog_instruction *inst;
1227 assert(n);
1228 assert(n->Opcode == IR_RETURN);
1229 assert(n->Label);
1230 inst = new_instruction(emitInfo, OPCODE_RET);
1231 inst->DstReg.CondMask = COND_TR; /* always return */
1232 return inst;
1233 }
1234
1235
1236 static struct prog_instruction *
1237 emit_kill(slang_emit_info *emitInfo)
1238 {
1239 struct gl_fragment_program *fp;
1240 struct prog_instruction *inst;
1241 /* NV-KILL - discard fragment depending on condition code.
1242 * Note that ARB-KILL depends on sign of vector operand.
1243 */
1244 inst = new_instruction(emitInfo, OPCODE_KIL_NV);
1245 inst->DstReg.CondMask = COND_TR; /* always kill */
1246
1247 assert(emitInfo->prog->Target == GL_FRAGMENT_PROGRAM_ARB);
1248 fp = (struct gl_fragment_program *) emitInfo->prog;
1249 fp->UsesKill = GL_TRUE;
1250
1251 return inst;
1252 }
1253
1254
1255 static struct prog_instruction *
1256 emit_tex(slang_emit_info *emitInfo, slang_ir_node *n)
1257 {
1258 struct prog_instruction *inst;
1259 gl_inst_opcode opcode;
1260
1261 if (n->Opcode == IR_TEX) {
1262 opcode = OPCODE_TEX;
1263 }
1264 else if (n->Opcode == IR_TEXB) {
1265 opcode = OPCODE_TXB;
1266 }
1267 else {
1268 assert(n->Opcode == IR_TEXP);
1269 opcode = OPCODE_TXP;
1270 }
1271
1272 /* emit code for the texcoord operand */
1273 (void) emit(emitInfo, n->Children[1]);
1274
1275 /* alloc storage for result of texture fetch */
1276 if (!alloc_node_storage(emitInfo, n, 4))
1277 return NULL;
1278
1279 /* emit TEX instruction; Child[1] is the texcoord */
1280 inst = emit_instruction(emitInfo,
1281 opcode,
1282 n->Store,
1283 n->Children[1]->Store,
1284 NULL,
1285 NULL);
1286
1287 /* Child[0] is the sampler (a uniform which'll indicate the texture unit) */
1288 assert(n->Children[0]->Store);
1289 /* Store->Index is the sampler index */
1290 assert(n->Children[0]->Store->Index >= 0);
1291 /* Store->Size is the texture target */
1292 assert(n->Children[0]->Store->Size >= TEXTURE_1D_INDEX);
1293 assert(n->Children[0]->Store->Size <= TEXTURE_RECT_INDEX);
1294
1295 inst->TexSrcTarget = n->Children[0]->Store->Size;
1296 inst->TexSrcUnit = n->Children[0]->Store->Index; /* i.e. uniform's index */
1297
1298 return inst;
1299 }
1300
1301
1302 /**
1303 * Assignment/copy
1304 */
1305 static struct prog_instruction *
1306 emit_copy(slang_emit_info *emitInfo, slang_ir_node *n)
1307 {
1308 struct prog_instruction *inst;
1309
1310 assert(n->Opcode == IR_COPY);
1311
1312 /* lhs */
1313 emit(emitInfo, n->Children[0]);
1314 if (!n->Children[0]->Store || n->Children[0]->Store->Index < 0) {
1315 /* an error should have been already recorded */
1316 return NULL;
1317 }
1318
1319 /* rhs */
1320 assert(n->Children[1]);
1321 inst = emit(emitInfo, n->Children[1]);
1322
1323 if (!n->Children[1]->Store || n->Children[1]->Store->Index < 0) {
1324 if (!emitInfo->log->text) {
1325 slang_info_log_error(emitInfo->log, "invalid assignment");
1326 }
1327 return NULL;
1328 }
1329
1330 assert(n->Children[1]->Store->Index >= 0);
1331
1332 /*assert(n->Children[0]->Store->Size == n->Children[1]->Store->Size);*/
1333
1334 n->Store = n->Children[0]->Store;
1335
1336 if (n->Store->File == PROGRAM_SAMPLER) {
1337 /* no code generated for sampler assignments,
1338 * just copy the sampler index at compile time.
1339 */
1340 n->Store->Index = n->Children[1]->Store->Index;
1341 return NULL;
1342 }
1343
1344 #if PEEPHOLE_OPTIMIZATIONS
1345 if (inst &&
1346 _slang_is_temp(emitInfo->vt, n->Children[1]->Store) &&
1347 (inst->DstReg.File == n->Children[1]->Store->File) &&
1348 (inst->DstReg.Index == n->Children[1]->Store->Index) &&
1349 !n->Children[0]->Store->IsIndirect &&
1350 n->Children[0]->Store->Size <= 4) {
1351 /* Peephole optimization:
1352 * The Right-Hand-Side has its results in a temporary place.
1353 * Modify the RHS (and the prev instruction) to store its results
1354 * in the destination specified by n->Children[0].
1355 * Then, this MOVE is a no-op.
1356 * Ex:
1357 * MUL tmp, x, y;
1358 * MOV a, tmp;
1359 * becomes:
1360 * MUL a, x, y;
1361 */
1362 if (n->Children[1]->Opcode != IR_SWIZZLE)
1363 _slang_free_temp(emitInfo->vt, n->Children[1]->Store);
1364 *n->Children[1]->Store = *n->Children[0]->Store;
1365
1366 /* fixup the previous instruction (which stored the RHS result) */
1367 assert(n->Children[0]->Store->Index >= 0);
1368
1369 storage_to_dst_reg(&inst->DstReg, n->Children[0]->Store);
1370 return inst;
1371 }
1372 else
1373 #endif
1374 {
1375 if (n->Children[0]->Store->Size > 4) {
1376 /* move matrix/struct etc (block of registers) */
1377 slang_ir_storage dstStore = *n->Children[0]->Store;
1378 slang_ir_storage srcStore = *n->Children[1]->Store;
1379 GLint size = srcStore.Size;
1380 ASSERT(n->Children[1]->Store->Swizzle == SWIZZLE_NOOP);
1381 dstStore.Size = 4;
1382 srcStore.Size = 4;
1383 while (size >= 4) {
1384 inst = emit_instruction(emitInfo, OPCODE_MOV,
1385 &dstStore,
1386 &srcStore,
1387 NULL,
1388 NULL);
1389 inst_comment(inst, "IR_COPY block");
1390 srcStore.Index++;
1391 dstStore.Index++;
1392 size -= 4;
1393 }
1394 }
1395 else {
1396 /* single register move */
1397 char *srcAnnot, *dstAnnot;
1398 assert(n->Children[0]->Store->Index >= 0);
1399 inst = emit_instruction(emitInfo, OPCODE_MOV,
1400 n->Children[0]->Store, /* dest */
1401 n->Children[1]->Store,
1402 NULL,
1403 NULL);
1404 dstAnnot = storage_annotation(n->Children[0], emitInfo->prog);
1405 srcAnnot = storage_annotation(n->Children[1], emitInfo->prog);
1406 inst->Comment = instruction_annotation(inst->Opcode, dstAnnot,
1407 srcAnnot, NULL, NULL);
1408 }
1409 free_node_storage(emitInfo->vt, n->Children[1]);
1410 return inst;
1411 }
1412 }
1413
1414
1415 /**
1416 * An IR_COND node wraps a boolean expression which is used by an
1417 * IF or WHILE test. This is where we'll set condition codes, if needed.
1418 */
1419 static struct prog_instruction *
1420 emit_cond(slang_emit_info *emitInfo, slang_ir_node *n)
1421 {
1422 struct prog_instruction *inst;
1423
1424 assert(n->Opcode == IR_COND);
1425
1426 if (!n->Children[0])
1427 return NULL;
1428
1429 /* emit code for the expression */
1430 inst = emit(emitInfo, n->Children[0]);
1431
1432 if (!n->Children[0]->Store) {
1433 /* error recovery */
1434 return NULL;
1435 }
1436
1437 assert(n->Children[0]->Store);
1438 /*assert(n->Children[0]->Store->Size == 1);*/
1439
1440 if (emitInfo->EmitCondCodes) {
1441 if (inst &&
1442 n->Children[0]->Store &&
1443 inst->DstReg.File == n->Children[0]->Store->File &&
1444 inst->DstReg.Index == n->Children[0]->Store->Index) {
1445 /* The previous instruction wrote to the register who's value
1446 * we're testing. Just fix that instruction so that the
1447 * condition codes are computed.
1448 */
1449 inst->CondUpdate = GL_TRUE;
1450 n->Store = n->Children[0]->Store;
1451 return inst;
1452 }
1453 else {
1454 /* This'll happen for things like "if (i) ..." where no code
1455 * is normally generated for the expression "i".
1456 * Generate a move instruction just to set condition codes.
1457 */
1458 if (!alloc_node_storage(emitInfo, n, 1))
1459 return NULL;
1460 inst = emit_instruction(emitInfo, OPCODE_MOV,
1461 n->Store, /* dest */
1462 n->Children[0]->Store,
1463 NULL,
1464 NULL);
1465 inst->CondUpdate = GL_TRUE;
1466 inst_comment(inst, "COND expr");
1467 _slang_free_temp(emitInfo->vt, n->Store);
1468 return inst;
1469 }
1470 }
1471 else {
1472 /* No-op: the boolean result of the expression is in a regular reg */
1473 n->Store = n->Children[0]->Store;
1474 return inst;
1475 }
1476 }
1477
1478
1479 /**
1480 * Logical-NOT
1481 */
1482 static struct prog_instruction *
1483 emit_not(slang_emit_info *emitInfo, slang_ir_node *n)
1484 {
1485 static const struct {
1486 gl_inst_opcode op, opNot;
1487 } operators[] = {
1488 { OPCODE_SLT, OPCODE_SGE },
1489 { OPCODE_SLE, OPCODE_SGT },
1490 { OPCODE_SGT, OPCODE_SLE },
1491 { OPCODE_SGE, OPCODE_SLT },
1492 { OPCODE_SEQ, OPCODE_SNE },
1493 { OPCODE_SNE, OPCODE_SEQ },
1494 { 0, 0 }
1495 };
1496 struct prog_instruction *inst;
1497 slang_ir_storage zero;
1498 GLuint i;
1499
1500 /* child expr */
1501 inst = emit(emitInfo, n->Children[0]);
1502
1503 #if PEEPHOLE_OPTIMIZATIONS
1504 if (inst) {
1505 /* if the prev instruction was a comparison instruction, invert it */
1506 for (i = 0; operators[i].op; i++) {
1507 if (inst->Opcode == operators[i].op) {
1508 inst->Opcode = operators[i].opNot;
1509 n->Store = n->Children[0]->Store;
1510 return inst;
1511 }
1512 }
1513 }
1514 #endif
1515
1516 /* else, invert using SEQ (v = v == 0) */
1517 if (!alloc_node_storage(emitInfo, n, n->Children[0]->Store->Size))
1518 return NULL;
1519
1520 constant_to_storage(emitInfo, 0.0, &zero);
1521 inst = emit_instruction(emitInfo,
1522 OPCODE_SEQ,
1523 n->Store,
1524 n->Children[0]->Store,
1525 &zero,
1526 NULL);
1527 inst_comment(inst, "NOT");
1528
1529 free_node_storage(emitInfo->vt, n->Children[0]);
1530
1531 return inst;
1532 }
1533
1534
1535 static struct prog_instruction *
1536 emit_if(slang_emit_info *emitInfo, slang_ir_node *n)
1537 {
1538 struct gl_program *prog = emitInfo->prog;
1539 GLuint ifInstLoc, elseInstLoc = 0;
1540 GLuint condWritemask = 0;
1541
1542 /* emit condition expression code */
1543 {
1544 struct prog_instruction *inst;
1545 inst = emit(emitInfo, n->Children[0]);
1546 if (emitInfo->EmitCondCodes) {
1547 if (!inst) {
1548 /* error recovery */
1549 return NULL;
1550 }
1551 condWritemask = inst->DstReg.WriteMask;
1552 }
1553 }
1554
1555 if (!n->Children[0]->Store)
1556 return NULL;
1557
1558 #if 0
1559 assert(n->Children[0]->Store->Size == 1); /* a bool! */
1560 #endif
1561
1562 ifInstLoc = prog->NumInstructions;
1563 if (emitInfo->EmitHighLevelInstructions) {
1564 if (emitInfo->EmitCondCodes) {
1565 /* IF condcode THEN ... */
1566 struct prog_instruction *ifInst;
1567 ifInst = new_instruction(emitInfo, OPCODE_IF);
1568 ifInst->DstReg.CondMask = COND_NE; /* if cond is non-zero */
1569 /* only test the cond code (1 of 4) that was updated by the
1570 * previous instruction.
1571 */
1572 ifInst->DstReg.CondSwizzle = writemask_to_swizzle(condWritemask);
1573 }
1574 else {
1575 /* IF src[0] THEN ... */
1576 emit_instruction(emitInfo, OPCODE_IF,
1577 NULL, /* dst */
1578 n->Children[0]->Store, /* op0 */
1579 NULL,
1580 NULL);
1581 }
1582 }
1583 else {
1584 /* conditional jump to else, or endif */
1585 struct prog_instruction *ifInst = new_instruction(emitInfo, OPCODE_BRA);
1586 ifInst->DstReg.CondMask = COND_EQ; /* BRA if cond is zero */
1587 inst_comment(ifInst, "if zero");
1588 ifInst->DstReg.CondSwizzle = writemask_to_swizzle(condWritemask);
1589 }
1590
1591 /* if body */
1592 emit(emitInfo, n->Children[1]);
1593
1594 if (n->Children[2]) {
1595 /* have else body */
1596 elseInstLoc = prog->NumInstructions;
1597 if (emitInfo->EmitHighLevelInstructions) {
1598 (void) new_instruction(emitInfo, OPCODE_ELSE);
1599 }
1600 else {
1601 /* jump to endif instruction */
1602 struct prog_instruction *inst;
1603 inst = new_instruction(emitInfo, OPCODE_BRA);
1604 inst_comment(inst, "else");
1605 inst->DstReg.CondMask = COND_TR; /* always branch */
1606 }
1607 prog->Instructions[ifInstLoc].BranchTarget = prog->NumInstructions;
1608 emit(emitInfo, n->Children[2]);
1609 }
1610 else {
1611 /* no else body */
1612 prog->Instructions[ifInstLoc].BranchTarget = prog->NumInstructions;
1613 }
1614
1615 if (emitInfo->EmitHighLevelInstructions) {
1616 (void) new_instruction(emitInfo, OPCODE_ENDIF);
1617 }
1618
1619 if (n->Children[2]) {
1620 prog->Instructions[elseInstLoc].BranchTarget = prog->NumInstructions;
1621 }
1622 return NULL;
1623 }
1624
1625
1626 static struct prog_instruction *
1627 emit_loop(slang_emit_info *emitInfo, slang_ir_node *n)
1628 {
1629 struct gl_program *prog = emitInfo->prog;
1630 struct prog_instruction *endInst;
1631 GLuint beginInstLoc, tailInstLoc, endInstLoc;
1632 slang_ir_node *ir;
1633
1634 /* emit OPCODE_BGNLOOP */
1635 beginInstLoc = prog->NumInstructions;
1636 if (emitInfo->EmitHighLevelInstructions) {
1637 (void) new_instruction(emitInfo, OPCODE_BGNLOOP);
1638 }
1639
1640 /* body */
1641 emit(emitInfo, n->Children[0]);
1642
1643 /* tail */
1644 tailInstLoc = prog->NumInstructions;
1645 if (n->Children[1]) {
1646 if (emitInfo->EmitComments)
1647 emit_comment(emitInfo, "Loop tail code:");
1648 emit(emitInfo, n->Children[1]);
1649 }
1650
1651 endInstLoc = prog->NumInstructions;
1652 if (emitInfo->EmitHighLevelInstructions) {
1653 /* emit OPCODE_ENDLOOP */
1654 endInst = new_instruction(emitInfo, OPCODE_ENDLOOP);
1655 }
1656 else {
1657 /* emit unconditional BRA-nch */
1658 endInst = new_instruction(emitInfo, OPCODE_BRA);
1659 endInst->DstReg.CondMask = COND_TR; /* always true */
1660 }
1661 /* ENDLOOP's BranchTarget points to the BGNLOOP inst */
1662 endInst->BranchTarget = beginInstLoc;
1663
1664 if (emitInfo->EmitHighLevelInstructions) {
1665 /* BGNLOOP's BranchTarget points to the ENDLOOP inst */
1666 prog->Instructions[beginInstLoc].BranchTarget = prog->NumInstructions -1;
1667 }
1668
1669 /* Done emitting loop code. Now walk over the loop's linked list of
1670 * BREAK and CONT nodes, filling in their BranchTarget fields (which
1671 * will point to the ENDLOOP+1 or BGNLOOP instructions, respectively).
1672 */
1673 for (ir = n->List; ir; ir = ir->List) {
1674 struct prog_instruction *inst = prog->Instructions + ir->InstLocation;
1675 assert(inst->BranchTarget < 0);
1676 if (ir->Opcode == IR_BREAK ||
1677 ir->Opcode == IR_BREAK_IF_TRUE) {
1678 assert(inst->Opcode == OPCODE_BRK ||
1679 inst->Opcode == OPCODE_BRA);
1680 /* go to instruction after end of loop */
1681 inst->BranchTarget = endInstLoc + 1;
1682 }
1683 else {
1684 assert(ir->Opcode == IR_CONT ||
1685 ir->Opcode == IR_CONT_IF_TRUE);
1686 assert(inst->Opcode == OPCODE_CONT ||
1687 inst->Opcode == OPCODE_BRA);
1688 /* go to instruction at tail of loop */
1689 inst->BranchTarget = endInstLoc;
1690 }
1691 }
1692 return NULL;
1693 }
1694
1695
1696 /**
1697 * Unconditional "continue" or "break" statement.
1698 * Either OPCODE_CONT, OPCODE_BRK or OPCODE_BRA will be emitted.
1699 */
1700 static struct prog_instruction *
1701 emit_cont_break(slang_emit_info *emitInfo, slang_ir_node *n)
1702 {
1703 gl_inst_opcode opcode;
1704 struct prog_instruction *inst;
1705
1706 if (n->Opcode == IR_CONT) {
1707 /* we need to execute the loop's tail code before doing CONT */
1708 assert(n->Parent);
1709 assert(n->Parent->Opcode == IR_LOOP);
1710 if (n->Parent->Children[1]) {
1711 /* emit tail code */
1712 if (emitInfo->EmitComments) {
1713 emit_comment(emitInfo, "continue - tail code:");
1714 }
1715 emit(emitInfo, n->Parent->Children[1]);
1716 }
1717 }
1718
1719 /* opcode selection */
1720 if (emitInfo->EmitHighLevelInstructions) {
1721 opcode = (n->Opcode == IR_CONT) ? OPCODE_CONT : OPCODE_BRK;
1722 }
1723 else {
1724 opcode = OPCODE_BRA;
1725 }
1726 n->InstLocation = emitInfo->prog->NumInstructions;
1727 inst = new_instruction(emitInfo, opcode);
1728 inst->DstReg.CondMask = COND_TR; /* always true */
1729 return inst;
1730 }
1731
1732
1733 /**
1734 * Conditional "continue" or "break" statement.
1735 * Either OPCODE_CONT, OPCODE_BRK or OPCODE_BRA will be emitted.
1736 */
1737 static struct prog_instruction *
1738 emit_cont_break_if_true(slang_emit_info *emitInfo, slang_ir_node *n)
1739 {
1740 struct prog_instruction *inst;
1741
1742 assert(n->Opcode == IR_CONT_IF_TRUE ||
1743 n->Opcode == IR_BREAK_IF_TRUE);
1744
1745 /* evaluate condition expr, setting cond codes */
1746 inst = emit(emitInfo, n->Children[0]);
1747 if (emitInfo->EmitCondCodes) {
1748 assert(inst);
1749 inst->CondUpdate = GL_TRUE;
1750 }
1751
1752 n->InstLocation = emitInfo->prog->NumInstructions;
1753
1754 /* opcode selection */
1755 if (emitInfo->EmitHighLevelInstructions) {
1756 const gl_inst_opcode opcode
1757 = (n->Opcode == IR_CONT_IF_TRUE) ? OPCODE_CONT : OPCODE_BRK;
1758 if (emitInfo->EmitCondCodes) {
1759 /* Get the writemask from the previous instruction which set
1760 * the condcodes. Use that writemask as the CondSwizzle.
1761 */
1762 const GLuint condWritemask = inst->DstReg.WriteMask;
1763 inst = new_instruction(emitInfo, opcode);
1764 inst->DstReg.CondMask = COND_NE;
1765 inst->DstReg.CondSwizzle = writemask_to_swizzle(condWritemask);
1766 return inst;
1767 }
1768 else {
1769 /* IF reg
1770 * BRK/CONT;
1771 * ENDIF
1772 */
1773 GLint ifInstLoc;
1774 ifInstLoc = emitInfo->prog->NumInstructions;
1775 inst = emit_instruction(emitInfo, OPCODE_IF,
1776 NULL, /* dest */
1777 n->Children[0]->Store,
1778 NULL,
1779 NULL);
1780 n->InstLocation = emitInfo->prog->NumInstructions;
1781
1782 inst = new_instruction(emitInfo, opcode);
1783 inst = new_instruction(emitInfo, OPCODE_ENDIF);
1784
1785 emitInfo->prog->Instructions[ifInstLoc].BranchTarget
1786 = emitInfo->prog->NumInstructions;
1787 return inst;
1788 }
1789 }
1790 else {
1791 const GLuint condWritemask = inst->DstReg.WriteMask;
1792 assert(emitInfo->EmitCondCodes);
1793 inst = new_instruction(emitInfo, OPCODE_BRA);
1794 inst->DstReg.CondMask = COND_NE;
1795 inst->DstReg.CondSwizzle = writemask_to_swizzle(condWritemask);
1796 return inst;
1797 }
1798 }
1799
1800
1801 static struct prog_instruction *
1802 emit_swizzle(slang_emit_info *emitInfo, slang_ir_node *n)
1803 {
1804 struct prog_instruction *inst;
1805
1806 inst = emit(emitInfo, n->Children[0]);
1807
1808 #if 0
1809 assert(n->Store->Parent);
1810 /* Apply this node's swizzle to parent's storage */
1811 GLuint swizzle = n->Store->Swizzle;
1812 _slang_copy_ir_storage(n->Store, n->Store->Parent);
1813 n->Store->Swizzle = _slang_swizzle_swizzle(n->Store->Swizzle, swizzle);
1814 assert(!n->Store->Parent);
1815 #endif
1816 return inst;
1817 }
1818
1819
1820 /**
1821 * Dereference array element: element == array[index]
1822 * This basically involves emitting code for computing the array index
1823 * and updating the node/element's storage info.
1824 */
1825 static struct prog_instruction *
1826 emit_array_element(slang_emit_info *emitInfo, slang_ir_node *n)
1827 {
1828 slang_ir_storage *arrayStore, *indexStore;
1829 const int elemSize = n->Store->Size; /* number of floats */
1830 const GLint elemSizeVec = (elemSize + 3) / 4; /* number of vec4 */
1831 struct prog_instruction *inst;
1832
1833 assert(n->Opcode == IR_ELEMENT);
1834 assert(elemSize > 0);
1835
1836 /* special case for built-in state variables, like light state */
1837 {
1838 slang_ir_storage *root = n->Store;
1839 assert(!root->Parent);
1840 while (root->Parent)
1841 root = root->Parent;
1842
1843 if (root->File == PROGRAM_STATE_VAR) {
1844 GLint index = _slang_alloc_statevar(n, emitInfo->prog->Parameters);
1845 assert(n->Store->Index == index);
1846 return NULL;
1847 }
1848 }
1849
1850 /* do codegen for array itself */
1851 emit(emitInfo, n->Children[0]);
1852 arrayStore = n->Children[0]->Store;
1853
1854 /* The initial array element storage is the array's storage,
1855 * then modified below.
1856 */
1857 _slang_copy_ir_storage(n->Store, arrayStore);
1858
1859
1860 if (n->Children[1]->Opcode == IR_FLOAT) {
1861 /* Constant array index */
1862 const GLint element = (GLint) n->Children[1]->Value[0];
1863
1864 /* this element's storage is the array's storage, plus constant offset */
1865 n->Store->Index += elemSizeVec * element;
1866 }
1867 else {
1868 /* Variable array index */
1869
1870 /* do codegen for array index expression */
1871 emit(emitInfo, n->Children[1]);
1872 indexStore = n->Children[1]->Store;
1873
1874 if (indexStore->IsIndirect) {
1875 /* need to put the array index into a temporary since we can't
1876 * directly support a[b[i]] constructs.
1877 */
1878
1879
1880 /*indexStore = tempstore();*/
1881 }
1882
1883
1884 if (elemSize > 4) {
1885 /* need to multiply array index by array element size */
1886 struct prog_instruction *inst;
1887 slang_ir_storage *indexTemp;
1888 slang_ir_storage elemSizeStore;
1889
1890 /* allocate 1 float indexTemp */
1891 indexTemp = _slang_new_ir_storage(PROGRAM_TEMPORARY, -1, 1);
1892 _slang_alloc_temp(emitInfo->vt, indexTemp);
1893
1894 /* allocate a constant containing the element size */
1895 constant_to_storage(emitInfo, (float) elemSizeVec, &elemSizeStore);
1896
1897 /* multiply array index by element size */
1898 inst = emit_instruction(emitInfo,
1899 OPCODE_MUL,
1900 indexTemp, /* dest */
1901 indexStore, /* the index */
1902 &elemSizeStore,
1903 NULL);
1904
1905 indexStore = indexTemp;
1906 }
1907
1908 if (arrayStore->IsIndirect) {
1909 /* ex: in a[i][j], a[i] (the arrayStore) is indirect */
1910 /* Need to add indexStore to arrayStore->Indirect store */
1911 slang_ir_storage indirectArray;
1912 slang_ir_storage *indexTemp;
1913
1914 _slang_init_ir_storage(&indirectArray,
1915 arrayStore->IndirectFile,
1916 arrayStore->IndirectIndex,
1917 1,
1918 arrayStore->IndirectSwizzle);
1919
1920 /* allocate 1 float indexTemp */
1921 indexTemp = _slang_new_ir_storage(PROGRAM_TEMPORARY, -1, 1);
1922 _slang_alloc_temp(emitInfo->vt, indexTemp);
1923
1924 inst = emit_instruction(emitInfo,
1925 OPCODE_ADD,
1926 indexTemp, /* dest */
1927 indexStore, /* the index */
1928 &indirectArray, /* indirect array base */
1929 NULL);
1930
1931 indexStore = indexTemp;
1932 }
1933
1934 /* update the array element storage info */
1935 n->Store->IsIndirect = GL_TRUE;
1936 n->Store->IndirectFile = indexStore->File;
1937 n->Store->IndirectIndex = indexStore->Index;
1938 n->Store->IndirectSwizzle = indexStore->Swizzle;
1939 }
1940
1941 n->Store->Size = elemSize;
1942 n->Store->Swizzle = _slang_var_swizzle(elemSize, 0);
1943
1944 return NULL; /* no instruction */
1945 }
1946
1947
1948 /**
1949 * Resolve storage for accessing a structure field.
1950 */
1951 static struct prog_instruction *
1952 emit_struct_field(slang_emit_info *emitInfo, slang_ir_node *n)
1953 {
1954 slang_ir_storage *root = n->Store;
1955 GLint fieldOffset, fieldSize;
1956
1957 assert(n->Opcode == IR_FIELD);
1958
1959 assert(!root->Parent);
1960 while (root->Parent)
1961 root = root->Parent;
1962
1963 /* If this is the field of a state var, allocate constant/uniform
1964 * storage for it now if we haven't already.
1965 * Note that we allocate storage (uniform/constant slots) for state
1966 * variables here rather than at declaration time so we only allocate
1967 * space for the ones that we actually use!
1968 */
1969 if (root->File == PROGRAM_STATE_VAR) {
1970 root->Index = _slang_alloc_statevar(n, emitInfo->prog->Parameters);
1971 if (root->Index < 0) {
1972 slang_info_log_error(emitInfo->log, "Error parsing state variable");
1973 return NULL;
1974 }
1975 return NULL;
1976 }
1977 else {
1978 /* do codegen for struct */
1979 emit(emitInfo, n->Children[0]);
1980 assert(n->Children[0]->Store->Index >= 0);
1981 }
1982
1983 fieldOffset = n->Store->Index;
1984 fieldSize = n->Store->Size;
1985
1986 _slang_copy_ir_storage(n->Store, n->Children[0]->Store);
1987
1988 n->Store->Index = n->Children[0]->Store->Index + fieldOffset / 4;
1989 /* XXX test this:
1990 n->Store->Index += fieldOffset / 4;
1991 */
1992
1993 switch (fieldSize) {
1994 case 1:
1995 {
1996 GLint swz = fieldOffset % 4;
1997 n->Store->Swizzle = MAKE_SWIZZLE4(swz, swz, swz, swz);
1998 }
1999 break;
2000 case 2:
2001 n->Store->Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
2002 SWIZZLE_NIL, SWIZZLE_NIL);
2003 break;
2004 case 3:
2005 n->Store->Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
2006 SWIZZLE_Z, SWIZZLE_NIL);
2007 break;
2008 default:
2009 n->Store->Swizzle = SWIZZLE_XYZW;
2010 }
2011
2012 assert(n->Store->Index >= 0);
2013
2014 return NULL; /* no instruction */
2015 }
2016
2017
2018 /**
2019 * Emit code for a variable declaration.
2020 * This usually doesn't result in any code generation, but just
2021 * memory allocation.
2022 */
2023 static struct prog_instruction *
2024 emit_var_decl(slang_emit_info *emitInfo, slang_ir_node *n)
2025 {
2026 assert(n->Store);
2027 assert(n->Store->File != PROGRAM_UNDEFINED);
2028 assert(n->Store->Size > 0);
2029 /*assert(n->Store->Index < 0);*/
2030
2031 if (!n->Var || n->Var->isTemp) {
2032 /* a nameless/temporary variable, will be freed after first use */
2033 /*NEW*/
2034 if (n->Store->Index < 0 && !_slang_alloc_temp(emitInfo->vt, n->Store)) {
2035 slang_info_log_error(emitInfo->log,
2036 "Ran out of registers, too many temporaries");
2037 return NULL;
2038 }
2039 }
2040 else {
2041 /* a regular variable */
2042 _slang_add_variable(emitInfo->vt, n->Var);
2043 if (!_slang_alloc_var(emitInfo->vt, n->Store)) {
2044 slang_info_log_error(emitInfo->log,
2045 "Ran out of registers, too many variables");
2046 return NULL;
2047 }
2048 /*
2049 printf("IR_VAR_DECL %s %d store %p\n",
2050 (char*) n->Var->a_name, n->Store->Index, (void*) n->Store);
2051 */
2052 assert(n->Var->store == n->Store);
2053 }
2054 if (emitInfo->EmitComments) {
2055 /* emit NOP with comment describing the variable's storage location */
2056 char s[1000];
2057 sprintf(s, "TEMP[%d]%s = variable %s (size %d)",
2058 n->Store->Index,
2059 _mesa_swizzle_string(n->Store->Swizzle, 0, GL_FALSE),
2060 (n->Var ? (char *) n->Var->a_name : "anonymous"),
2061 n->Store->Size);
2062 emit_comment(emitInfo, s);
2063 }
2064 return NULL;
2065 }
2066
2067
2068 /**
2069 * Emit code for a reference to a variable.
2070 * Actually, no code is generated but we may do some memory allocation.
2071 * In particular, state vars (uniforms) are allocated on an as-needed basis.
2072 */
2073 static struct prog_instruction *
2074 emit_var_ref(slang_emit_info *emitInfo, slang_ir_node *n)
2075 {
2076 assert(n->Store);
2077 assert(n->Store->File != PROGRAM_UNDEFINED);
2078
2079 if (n->Store->File == PROGRAM_STATE_VAR && n->Store->Index < 0) {
2080 n->Store->Index = _slang_alloc_statevar(n, emitInfo->prog->Parameters);
2081 }
2082 else if (n->Store->File == PROGRAM_UNIFORM) {
2083 /* mark var as used */
2084 _mesa_use_uniform(emitInfo->prog->Parameters, (char *) n->Var->a_name);
2085 }
2086
2087 if (n->Store->Index < 0) {
2088 /* probably ran out of registers */
2089 return NULL;
2090 }
2091 assert(n->Store->Size > 0);
2092
2093 return NULL;
2094 }
2095
2096
2097 static struct prog_instruction *
2098 emit(slang_emit_info *emitInfo, slang_ir_node *n)
2099 {
2100 struct prog_instruction *inst;
2101 if (!n)
2102 return NULL;
2103
2104 if (emitInfo->log->error_flag) {
2105 return NULL;
2106 }
2107
2108 switch (n->Opcode) {
2109 case IR_SEQ:
2110 /* sequence of two sub-trees */
2111 assert(n->Children[0]);
2112 assert(n->Children[1]);
2113 emit(emitInfo, n->Children[0]);
2114 if (emitInfo->log->error_flag)
2115 return NULL;
2116 inst = emit(emitInfo, n->Children[1]);
2117 #if 0
2118 assert(!n->Store);
2119 #endif
2120 n->Store = n->Children[1]->Store;
2121 return inst;
2122
2123 case IR_SCOPE:
2124 /* new variable scope */
2125 _slang_push_var_table(emitInfo->vt);
2126 inst = emit(emitInfo, n->Children[0]);
2127 _slang_pop_var_table(emitInfo->vt);
2128 return inst;
2129
2130 case IR_VAR_DECL:
2131 /* Variable declaration - allocate a register for it */
2132 inst = emit_var_decl(emitInfo, n);
2133 return inst;
2134
2135 case IR_VAR:
2136 /* Reference to a variable
2137 * Storage should have already been resolved/allocated.
2138 */
2139 return emit_var_ref(emitInfo, n);
2140
2141 case IR_ELEMENT:
2142 return emit_array_element(emitInfo, n);
2143 case IR_FIELD:
2144 return emit_struct_field(emitInfo, n);
2145 case IR_SWIZZLE:
2146 return emit_swizzle(emitInfo, n);
2147
2148 /* Simple arithmetic */
2149 /* unary */
2150 case IR_MOVE:
2151 case IR_RSQ:
2152 case IR_RCP:
2153 case IR_FLOOR:
2154 case IR_FRAC:
2155 case IR_F_TO_I:
2156 case IR_I_TO_F:
2157 case IR_ABS:
2158 case IR_SIN:
2159 case IR_COS:
2160 case IR_DDX:
2161 case IR_DDY:
2162 case IR_EXP:
2163 case IR_EXP2:
2164 case IR_LOG2:
2165 case IR_NOISE1:
2166 case IR_NOISE2:
2167 case IR_NOISE3:
2168 case IR_NOISE4:
2169 case IR_NRM4:
2170 case IR_NRM3:
2171 /* binary */
2172 case IR_ADD:
2173 case IR_SUB:
2174 case IR_MUL:
2175 case IR_DOT4:
2176 case IR_DOT3:
2177 case IR_DOT2:
2178 case IR_CROSS:
2179 case IR_MIN:
2180 case IR_MAX:
2181 case IR_SEQUAL:
2182 case IR_SNEQUAL:
2183 case IR_SGE:
2184 case IR_SGT:
2185 case IR_SLE:
2186 case IR_SLT:
2187 case IR_POW:
2188 /* trinary operators */
2189 case IR_LRP:
2190 return emit_arith(emitInfo, n);
2191
2192 case IR_EQUAL:
2193 case IR_NOTEQUAL:
2194 return emit_compare(emitInfo, n);
2195
2196 case IR_CLAMP:
2197 return emit_clamp(emitInfo, n);
2198 case IR_TEX:
2199 case IR_TEXB:
2200 case IR_TEXP:
2201 return emit_tex(emitInfo, n);
2202 case IR_NEG:
2203 return emit_negation(emitInfo, n);
2204 case IR_FLOAT:
2205 /* find storage location for this float constant */
2206 n->Store->Index = _mesa_add_unnamed_constant(emitInfo->prog->Parameters,
2207 n->Value,
2208 n->Store->Size,
2209 &n->Store->Swizzle);
2210 if (n->Store->Index < 0) {
2211 slang_info_log_error(emitInfo->log, "Ran out of space for constants");
2212 return NULL;
2213 }
2214 return NULL;
2215
2216 case IR_COPY:
2217 return emit_copy(emitInfo, n);
2218
2219 case IR_COND:
2220 return emit_cond(emitInfo, n);
2221
2222 case IR_NOT:
2223 return emit_not(emitInfo, n);
2224
2225 case IR_LABEL:
2226 return emit_label(emitInfo, n);
2227
2228 case IR_KILL:
2229 return emit_kill(emitInfo);
2230
2231 case IR_CALL:
2232 /* new variable scope for subroutines/function calls */
2233 _slang_push_var_table(emitInfo->vt);
2234 inst = emit_fcall(emitInfo, n);
2235 _slang_pop_var_table(emitInfo->vt);
2236 return inst;
2237
2238 case IR_IF:
2239 return emit_if(emitInfo, n);
2240
2241 case IR_LOOP:
2242 return emit_loop(emitInfo, n);
2243 case IR_BREAK_IF_TRUE:
2244 case IR_CONT_IF_TRUE:
2245 return emit_cont_break_if_true(emitInfo, n);
2246 case IR_BREAK:
2247 /* fall-through */
2248 case IR_CONT:
2249 return emit_cont_break(emitInfo, n);
2250
2251 case IR_BEGIN_SUB:
2252 return new_instruction(emitInfo, OPCODE_BGNSUB);
2253 case IR_END_SUB:
2254 return new_instruction(emitInfo, OPCODE_ENDSUB);
2255 case IR_RETURN:
2256 return emit_return(emitInfo, n);
2257
2258 case IR_NOP:
2259 return NULL;
2260
2261 default:
2262 _mesa_problem(NULL, "Unexpected IR opcode in emit()\n");
2263 }
2264 return NULL;
2265 }
2266
2267
2268 /**
2269 * After code generation, any subroutines will be in separate program
2270 * objects. This function appends all the subroutines onto the main
2271 * program and resolves the linking of all the branch/call instructions.
2272 * XXX this logic should really be part of the linking process...
2273 */
2274 static void
2275 _slang_resolve_subroutines(slang_emit_info *emitInfo)
2276 {
2277 GET_CURRENT_CONTEXT(ctx);
2278 struct gl_program *mainP = emitInfo->prog;
2279 GLuint *subroutineLoc, i, total;
2280
2281 subroutineLoc
2282 = (GLuint *) _mesa_malloc(emitInfo->NumSubroutines * sizeof(GLuint));
2283
2284 /* total number of instructions */
2285 total = mainP->NumInstructions;
2286 for (i = 0; i < emitInfo->NumSubroutines; i++) {
2287 subroutineLoc[i] = total;
2288 total += emitInfo->Subroutines[i]->NumInstructions;
2289 }
2290
2291 /* adjust BranchTargets within the functions */
2292 for (i = 0; i < emitInfo->NumSubroutines; i++) {
2293 struct gl_program *sub = emitInfo->Subroutines[i];
2294 GLuint j;
2295 for (j = 0; j < sub->NumInstructions; j++) {
2296 struct prog_instruction *inst = sub->Instructions + j;
2297 if (inst->Opcode != OPCODE_CAL && inst->BranchTarget >= 0) {
2298 inst->BranchTarget += subroutineLoc[i];
2299 }
2300 }
2301 }
2302
2303 /* append subroutines' instructions after main's instructions */
2304 mainP->Instructions = _mesa_realloc_instructions(mainP->Instructions,
2305 mainP->NumInstructions,
2306 total);
2307 mainP->NumInstructions = total;
2308 for (i = 0; i < emitInfo->NumSubroutines; i++) {
2309 struct gl_program *sub = emitInfo->Subroutines[i];
2310 _mesa_copy_instructions(mainP->Instructions + subroutineLoc[i],
2311 sub->Instructions,
2312 sub->NumInstructions);
2313 /* delete subroutine code */
2314 sub->Parameters = NULL; /* prevent double-free */
2315 _mesa_reference_program(ctx, &emitInfo->Subroutines[i], NULL);
2316 }
2317
2318 /* free subroutine list */
2319 if (emitInfo->Subroutines) {
2320 _mesa_free(emitInfo->Subroutines);
2321 emitInfo->Subroutines = NULL;
2322 }
2323 emitInfo->NumSubroutines = 0;
2324
2325 /* Examine CAL instructions.
2326 * At this point, the BranchTarget field of the CAL instruction is
2327 * the number/id of the subroutine to call (an index into the
2328 * emitInfo->Subroutines list).
2329 * Translate that into an actual instruction location now.
2330 */
2331 for (i = 0; i < mainP->NumInstructions; i++) {
2332 struct prog_instruction *inst = mainP->Instructions + i;
2333 if (inst->Opcode == OPCODE_CAL) {
2334 const GLuint f = inst->BranchTarget;
2335 inst->BranchTarget = subroutineLoc[f];
2336 }
2337 }
2338
2339 _mesa_free(subroutineLoc);
2340 }
2341
2342
2343
2344
2345 GLboolean
2346 _slang_emit_code(slang_ir_node *n, slang_var_table *vt,
2347 struct gl_program *prog, GLboolean withEnd,
2348 slang_info_log *log)
2349 {
2350 GET_CURRENT_CONTEXT(ctx);
2351 GLboolean success;
2352 slang_emit_info emitInfo;
2353 GLuint maxUniforms;
2354
2355 emitInfo.log = log;
2356 emitInfo.vt = vt;
2357 emitInfo.prog = prog;
2358 emitInfo.Subroutines = NULL;
2359 emitInfo.NumSubroutines = 0;
2360 emitInfo.MaxInstructions = prog->NumInstructions;
2361
2362 emitInfo.EmitHighLevelInstructions = ctx->Shader.EmitHighLevelInstructions;
2363 emitInfo.EmitCondCodes = ctx->Shader.EmitCondCodes;
2364 emitInfo.EmitComments = ctx->Shader.EmitComments;
2365 emitInfo.EmitBeginEndSub = GL_TRUE;
2366
2367 if (!emitInfo.EmitCondCodes) {
2368 emitInfo.EmitHighLevelInstructions = GL_TRUE;
2369 }
2370
2371 /* Check uniform/constant limits */
2372 if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
2373 maxUniforms = ctx->Const.FragmentProgram.MaxUniformComponents / 4;
2374 }
2375 else {
2376 assert(prog->Target == GL_VERTEX_PROGRAM_ARB);
2377 maxUniforms = ctx->Const.VertexProgram.MaxUniformComponents / 4;
2378 }
2379 if (prog->Parameters->NumParameters > maxUniforms) {
2380 slang_info_log_error(log, "Constant/uniform register limit exceeded");
2381 return GL_FALSE;
2382 }
2383
2384 (void) emit(&emitInfo, n);
2385
2386 /* finish up by adding the END opcode to program */
2387 if (withEnd) {
2388 struct prog_instruction *inst;
2389 inst = new_instruction(&emitInfo, OPCODE_END);
2390 }
2391
2392 _slang_resolve_subroutines(&emitInfo);
2393
2394 success = GL_TRUE;
2395
2396 #if 0
2397 printf("*********** End emit code (%u inst):\n", prog->NumInstructions);
2398 _mesa_print_program(prog);
2399 _mesa_print_program_parameters(ctx,prog);
2400 #endif
2401
2402 return success;
2403 }