Merge branch 'width0'
[mesa.git] / src / mesa / shader / slang / slang_emit.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2005-2008 Brian Paul All Rights Reserved.
5 * Copyright (C) 2008 VMware, Inc. All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file slang_emit.c
27 * Emit program instructions (PI code) from IR trees.
28 * \author Brian Paul
29 */
30
31 /***
32 *** NOTES
33 ***
34 *** To emit GPU instructions, we basically just do an in-order traversal
35 *** of the IR tree.
36 ***/
37
38
39 #include "main/imports.h"
40 #include "main/context.h"
41 #include "main/macros.h"
42 #include "shader/program.h"
43 #include "shader/prog_instruction.h"
44 #include "shader/prog_parameter.h"
45 #include "shader/prog_print.h"
46 #include "slang_builtin.h"
47 #include "slang_emit.h"
48 #include "slang_mem.h"
49
50
51 #define PEEPHOLE_OPTIMIZATIONS 1
52 #define ANNOTATE 0
53
54
55 typedef struct
56 {
57 slang_info_log *log;
58 slang_var_table *vt;
59 struct gl_program *prog;
60 struct gl_program **Subroutines;
61 GLuint NumSubroutines;
62
63 GLuint MaxInstructions; /**< size of prog->Instructions[] buffer */
64
65 GLboolean UnresolvedFunctions;
66
67 /* code-gen options */
68 GLboolean EmitHighLevelInstructions;
69 GLboolean EmitCondCodes;
70 GLboolean EmitComments;
71 GLboolean EmitBeginEndSub; /* XXX TEMPORARY */
72 } slang_emit_info;
73
74
75
76 static struct gl_program *
77 new_subroutine(slang_emit_info *emitInfo, GLuint *id)
78 {
79 GET_CURRENT_CONTEXT(ctx);
80 const GLuint n = emitInfo->NumSubroutines;
81
82 emitInfo->Subroutines = (struct gl_program **)
83 _mesa_realloc(emitInfo->Subroutines,
84 n * sizeof(struct gl_program *),
85 (n + 1) * sizeof(struct gl_program *));
86 emitInfo->Subroutines[n] = ctx->Driver.NewProgram(ctx, emitInfo->prog->Target, 0);
87 emitInfo->Subroutines[n]->Parameters = emitInfo->prog->Parameters;
88 emitInfo->NumSubroutines++;
89 *id = n;
90 return emitInfo->Subroutines[n];
91 }
92
93
94 /**
95 * Convert a writemask to a swizzle. Used for testing cond codes because
96 * we only want to test the cond code component(s) that was set by the
97 * previous instruction.
98 */
99 static GLuint
100 writemask_to_swizzle(GLuint writemask)
101 {
102 if (writemask == WRITEMASK_X)
103 return SWIZZLE_XXXX;
104 if (writemask == WRITEMASK_Y)
105 return SWIZZLE_YYYY;
106 if (writemask == WRITEMASK_Z)
107 return SWIZZLE_ZZZZ;
108 if (writemask == WRITEMASK_W)
109 return SWIZZLE_WWWW;
110 return SWIZZLE_XYZW; /* shouldn't be hit */
111 }
112
113
114 /**
115 * Convert a swizzle mask to a writemask.
116 * Note that the slang_ir_storage->Swizzle field can represent either a
117 * swizzle mask or a writemask, depending on how it's used. For example,
118 * when we parse "direction.yz" alone, we don't know whether .yz is a
119 * writemask or a swizzle. In this case, we encode ".yz" in store->Swizzle
120 * as a swizzle mask (.yz?? actually). Later, if direction.yz is used as
121 * an R-value, we use store->Swizzle as-is. Otherwise, if direction.yz is
122 * used as an L-value, we convert it to a writemask.
123 */
124 static GLuint
125 swizzle_to_writemask(GLuint swizzle)
126 {
127 GLuint i, writemask = 0x0;
128 for (i = 0; i < 4; i++) {
129 GLuint swz = GET_SWZ(swizzle, i);
130 if (swz <= SWIZZLE_W) {
131 writemask |= (1 << swz);
132 }
133 }
134 return writemask;
135 }
136
137
138 /**
139 * Swizzle a swizzle (function composition).
140 * That is, return swz2(swz1), or said another way: swz1.szw2
141 * Example: swizzle_swizzle(".zwxx", ".xxyw") yields ".zzwx"
142 */
143 GLuint
144 _slang_swizzle_swizzle(GLuint swz1, GLuint swz2)
145 {
146 GLuint i, swz, s[4];
147 for (i = 0; i < 4; i++) {
148 GLuint c = GET_SWZ(swz2, i);
149 if (c <= SWIZZLE_W)
150 s[i] = GET_SWZ(swz1, c);
151 else
152 s[i] = c;
153 }
154 swz = MAKE_SWIZZLE4(s[0], s[1], s[2], s[3]);
155 return swz;
156 }
157
158
159 /**
160 * Return the default swizzle mask for accessing a variable of the
161 * given size (in floats). If size = 1, comp is used to identify
162 * which component [0..3] of the register holds the variable.
163 */
164 GLuint
165 _slang_var_swizzle(GLint size, GLint comp)
166 {
167 switch (size) {
168 case 1:
169 return MAKE_SWIZZLE4(comp, SWIZZLE_NIL, SWIZZLE_NIL, SWIZZLE_NIL);
170 case 2:
171 return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_NIL, SWIZZLE_NIL);
172 case 3:
173 return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_NIL);
174 default:
175 return SWIZZLE_XYZW;
176 }
177 }
178
179
180
181 /**
182 * Allocate storage for the given node (if it hasn't already been allocated).
183 *
184 * Typically this is temporary storage for an intermediate result (such as
185 * for a multiply or add, etc).
186 *
187 * If n->Store does not exist it will be created and will be of the size
188 * specified by defaultSize.
189 */
190 static GLboolean
191 alloc_node_storage(slang_emit_info *emitInfo, slang_ir_node *n,
192 GLint defaultSize)
193 {
194 assert(!n->Var);
195 if (!n->Store) {
196 assert(defaultSize > 0);
197 n->Store = _slang_new_ir_storage(PROGRAM_TEMPORARY, -1, defaultSize);
198 if (!n->Store) {
199 return GL_FALSE;
200 }
201 }
202
203 /* now allocate actual register(s). I.e. set n->Store->Index >= 0 */
204 if (n->Store->Index < 0) {
205 if (!_slang_alloc_temp(emitInfo->vt, n->Store)) {
206 slang_info_log_error(emitInfo->log,
207 "Ran out of registers, too many temporaries");
208 _slang_free(n->Store);
209 n->Store = NULL;
210 return GL_FALSE;
211 }
212 }
213 return GL_TRUE;
214 }
215
216
217 /**
218 * Free temporary storage, if n->Store is, in fact, temp storage.
219 * Otherwise, no-op.
220 */
221 static void
222 free_node_storage(slang_var_table *vt, slang_ir_node *n)
223 {
224 if (n->Store->File == PROGRAM_TEMPORARY &&
225 n->Store->Index >= 0 &&
226 n->Opcode != IR_SWIZZLE) {
227 if (_slang_is_temp(vt, n->Store)) {
228 _slang_free_temp(vt, n->Store);
229 n->Store->Index = -1;
230 n->Store = NULL; /* XXX this may not be needed */
231 }
232 }
233 }
234
235
236 /**
237 * Helper function to allocate a short-term temporary.
238 * Free it with _slang_free_temp().
239 */
240 static GLboolean
241 alloc_local_temp(slang_emit_info *emitInfo, slang_ir_storage *temp, GLint size)
242 {
243 assert(size >= 1);
244 assert(size <= 4);
245 _mesa_bzero(temp, sizeof(*temp));
246 temp->Size = size;
247 temp->File = PROGRAM_TEMPORARY;
248 temp->Index = -1;
249 return _slang_alloc_temp(emitInfo->vt, temp);
250 }
251
252
253 /**
254 * Remove any SWIZZLE_NIL terms from given swizzle mask.
255 * For a swizzle like .z??? generate .zzzz (replicate single component).
256 * Else, for .wx?? generate .wxzw (insert default component for the position).
257 */
258 static GLuint
259 fix_swizzle(GLuint swizzle)
260 {
261 GLuint c0 = GET_SWZ(swizzle, 0),
262 c1 = GET_SWZ(swizzle, 1),
263 c2 = GET_SWZ(swizzle, 2),
264 c3 = GET_SWZ(swizzle, 3);
265 if (c1 == SWIZZLE_NIL && c2 == SWIZZLE_NIL && c3 == SWIZZLE_NIL) {
266 /* smear first component across all positions */
267 c1 = c2 = c3 = c0;
268 }
269 else {
270 /* insert default swizzle components */
271 if (c0 == SWIZZLE_NIL)
272 c0 = SWIZZLE_X;
273 if (c1 == SWIZZLE_NIL)
274 c1 = SWIZZLE_Y;
275 if (c2 == SWIZZLE_NIL)
276 c2 = SWIZZLE_Z;
277 if (c3 == SWIZZLE_NIL)
278 c3 = SWIZZLE_W;
279 }
280 return MAKE_SWIZZLE4(c0, c1, c2, c3);
281 }
282
283
284
285 /**
286 * Convert IR storage to an instruction dst register.
287 */
288 static void
289 storage_to_dst_reg(struct prog_dst_register *dst, const slang_ir_storage *st)
290 {
291 const GLboolean relAddr = st->RelAddr;
292 const GLint size = st->Size;
293 GLint index = st->Index;
294 GLuint swizzle = st->Swizzle;
295
296 assert(index >= 0);
297 /* if this is storage relative to some parent storage, walk up the tree */
298 while (st->Parent) {
299 st = st->Parent;
300 assert(st->Index >= 0);
301 index += st->Index;
302 swizzle = _slang_swizzle_swizzle(st->Swizzle, swizzle);
303 }
304
305 assert(st->File != PROGRAM_UNDEFINED);
306 dst->File = st->File;
307
308 assert(index >= 0);
309 dst->Index = index;
310
311 assert(size >= 1);
312 assert(size <= 4);
313
314 if (swizzle != SWIZZLE_XYZW) {
315 dst->WriteMask = swizzle_to_writemask(swizzle);
316 }
317 else {
318 switch (size) {
319 case 1:
320 dst->WriteMask = WRITEMASK_X << GET_SWZ(st->Swizzle, 0);
321 break;
322 case 2:
323 dst->WriteMask = WRITEMASK_XY;
324 break;
325 case 3:
326 dst->WriteMask = WRITEMASK_XYZ;
327 break;
328 case 4:
329 dst->WriteMask = WRITEMASK_XYZW;
330 break;
331 default:
332 ; /* error would have been caught above */
333 }
334 }
335
336 dst->RelAddr = relAddr;
337 }
338
339
340 /**
341 * Convert IR storage to an instruction src register.
342 */
343 static void
344 storage_to_src_reg(struct prog_src_register *src, const slang_ir_storage *st)
345 {
346 const GLboolean relAddr = st->RelAddr;
347 GLint index = st->Index;
348 GLuint swizzle = st->Swizzle;
349
350 /* if this is storage relative to some parent storage, walk up the tree */
351 assert(index >= 0);
352 while (st->Parent) {
353 st = st->Parent;
354 if (st->Index < 0) {
355 /* an error should have been reported already */
356 return;
357 }
358 assert(st->Index >= 0);
359 index += st->Index;
360 swizzle = _slang_swizzle_swizzle(fix_swizzle(st->Swizzle), swizzle);
361 }
362
363 assert(st->File >= 0);
364 #if 1 /* XXX temporary */
365 if (st->File == PROGRAM_UNDEFINED) {
366 slang_ir_storage *st0 = (slang_ir_storage *) st;
367 st0->File = PROGRAM_TEMPORARY;
368 }
369 #endif
370 assert(st->File < PROGRAM_UNDEFINED);
371 src->File = st->File;
372
373 assert(index >= 0);
374 src->Index = index;
375
376 swizzle = fix_swizzle(swizzle);
377 assert(GET_SWZ(swizzle, 0) <= SWIZZLE_W);
378 assert(GET_SWZ(swizzle, 1) <= SWIZZLE_W);
379 assert(GET_SWZ(swizzle, 2) <= SWIZZLE_W);
380 assert(GET_SWZ(swizzle, 3) <= SWIZZLE_W);
381 src->Swizzle = swizzle;
382
383 src->RelAddr = relAddr;
384 }
385
386
387 /*
388 * Setup storage pointing to a scalar constant/literal.
389 */
390 static void
391 constant_to_storage(slang_emit_info *emitInfo,
392 GLfloat val,
393 slang_ir_storage *store)
394 {
395 GLuint swizzle;
396 GLint reg;
397 GLfloat value[4];
398
399 value[0] = val;
400 reg = _mesa_add_unnamed_constant(emitInfo->prog->Parameters,
401 value, 1, &swizzle);
402
403 memset(store, 0, sizeof(*store));
404 store->File = PROGRAM_CONSTANT;
405 store->Index = reg;
406 store->Swizzle = swizzle;
407 }
408
409
410 /**
411 * Add new instruction at end of given program.
412 * \param prog the program to append instruction onto
413 * \param opcode opcode for the new instruction
414 * \return pointer to the new instruction
415 */
416 static struct prog_instruction *
417 new_instruction(slang_emit_info *emitInfo, gl_inst_opcode opcode)
418 {
419 struct gl_program *prog = emitInfo->prog;
420 struct prog_instruction *inst;
421
422 #if 0
423 /* print prev inst */
424 if (prog->NumInstructions > 0) {
425 _mesa_print_instruction(prog->Instructions + prog->NumInstructions - 1);
426 }
427 #endif
428 assert(prog->NumInstructions <= emitInfo->MaxInstructions);
429
430 if (prog->NumInstructions == emitInfo->MaxInstructions) {
431 /* grow the instruction buffer */
432 emitInfo->MaxInstructions += 20;
433 prog->Instructions =
434 _mesa_realloc_instructions(prog->Instructions,
435 prog->NumInstructions,
436 emitInfo->MaxInstructions);
437 if (!prog->Instructions) {
438 return NULL;
439 }
440 }
441
442 inst = prog->Instructions + prog->NumInstructions;
443 prog->NumInstructions++;
444 _mesa_init_instructions(inst, 1);
445 inst->Opcode = opcode;
446 inst->BranchTarget = -1; /* invalid */
447 /*
448 printf("New inst %d: %p %s\n", prog->NumInstructions-1,(void*)inst,
449 _mesa_opcode_string(inst->Opcode));
450 */
451 return inst;
452 }
453
454
455 static struct prog_instruction *
456 emit_arl_load(slang_emit_info *emitInfo,
457 gl_register_file file, GLint index, GLuint swizzle)
458 {
459 struct prog_instruction *inst = new_instruction(emitInfo, OPCODE_ARL);
460 if (inst) {
461 inst->SrcReg[0].File = file;
462 inst->SrcReg[0].Index = index;
463 inst->SrcReg[0].Swizzle = fix_swizzle(swizzle);
464 inst->DstReg.File = PROGRAM_ADDRESS;
465 inst->DstReg.Index = 0;
466 inst->DstReg.WriteMask = WRITEMASK_X;
467 }
468 return inst;
469 }
470
471
472 /**
473 * Emit a new instruction with given opcode, operands.
474 * At this point the instruction may have multiple indirect register
475 * loads/stores. We convert those into ARL loads and address-relative
476 * operands. See comments inside.
477 * At some point in the future we could directly emit indirectly addressed
478 * registers in Mesa GPU instructions.
479 */
480 static struct prog_instruction *
481 emit_instruction(slang_emit_info *emitInfo,
482 gl_inst_opcode opcode,
483 const slang_ir_storage *dst,
484 const slang_ir_storage *src0,
485 const slang_ir_storage *src1,
486 const slang_ir_storage *src2)
487 {
488 struct prog_instruction *inst;
489 GLuint numIndirect = 0;
490 const slang_ir_storage *src[3];
491 slang_ir_storage newSrc[3], newDst;
492 GLuint i;
493 GLboolean isTemp[3];
494
495 isTemp[0] = isTemp[1] = isTemp[2] = GL_FALSE;
496
497 src[0] = src0;
498 src[1] = src1;
499 src[2] = src2;
500
501 /* count up how many operands are indirect loads */
502 for (i = 0; i < 3; i++) {
503 if (src[i] && src[i]->IsIndirect)
504 numIndirect++;
505 }
506 if (dst && dst->IsIndirect)
507 numIndirect++;
508
509 /* Take special steps for indirect register loads.
510 * If we had multiple address registers this would be simpler.
511 * For example, this GLSL code:
512 * x[i] = y[j] + z[k];
513 * would translate into something like:
514 * ARL ADDR.x, i;
515 * ARL ADDR.y, j;
516 * ARL ADDR.z, k;
517 * ADD TEMP[ADDR.x+5], TEMP[ADDR.y+9], TEMP[ADDR.z+4];
518 * But since we currently only have one address register we have to do this:
519 * ARL ADDR.x, i;
520 * MOV t1, TEMP[ADDR.x+9];
521 * ARL ADDR.x, j;
522 * MOV t2, TEMP[ADDR.x+4];
523 * ARL ADDR.x, k;
524 * ADD TEMP[ADDR.x+5], t1, t2;
525 * The code here figures this out...
526 */
527 if (numIndirect > 0) {
528 for (i = 0; i < 3; i++) {
529 if (src[i] && src[i]->IsIndirect) {
530 /* load the ARL register with the indirect register */
531 emit_arl_load(emitInfo,
532 src[i]->IndirectFile,
533 src[i]->IndirectIndex,
534 src[i]->IndirectSwizzle);
535
536 if (numIndirect > 1) {
537 /* Need to load src[i] into a temporary register */
538 slang_ir_storage srcRelAddr;
539 alloc_local_temp(emitInfo, &newSrc[i], src[i]->Size);
540 isTemp[i] = GL_TRUE;
541
542 /* set RelAddr flag on src register */
543 srcRelAddr = *src[i];
544 srcRelAddr.RelAddr = GL_TRUE;
545 srcRelAddr.IsIndirect = GL_FALSE; /* not really needed */
546
547 /* MOV newSrc, srcRelAddr; */
548 inst = emit_instruction(emitInfo,
549 OPCODE_MOV,
550 &newSrc[i],
551 &srcRelAddr,
552 NULL,
553 NULL);
554 if (!inst) {
555 return NULL;
556 }
557
558 src[i] = &newSrc[i];
559 }
560 else {
561 /* just rewrite the src[i] storage to be ARL-relative */
562 newSrc[i] = *src[i];
563 newSrc[i].RelAddr = GL_TRUE;
564 newSrc[i].IsIndirect = GL_FALSE; /* not really needed */
565 src[i] = &newSrc[i];
566 }
567 }
568 }
569 }
570
571 /* Take special steps for indirect dest register write */
572 if (dst && dst->IsIndirect) {
573 /* load the ARL register with the indirect register */
574 emit_arl_load(emitInfo,
575 dst->IndirectFile,
576 dst->IndirectIndex,
577 dst->IndirectSwizzle);
578 newDst = *dst;
579 newDst.RelAddr = GL_TRUE;
580 newDst.IsIndirect = GL_FALSE;
581 dst = &newDst;
582 }
583
584 /* OK, emit the instruction and its dst, src regs */
585 inst = new_instruction(emitInfo, opcode);
586 if (!inst)
587 return NULL;
588
589 if (dst)
590 storage_to_dst_reg(&inst->DstReg, dst);
591
592 for (i = 0; i < 3; i++) {
593 if (src[i])
594 storage_to_src_reg(&inst->SrcReg[i], src[i]);
595 }
596
597 /* Free any temp registers that we allocated above */
598 for (i = 0; i < 3; i++) {
599 if (isTemp[i])
600 _slang_free_temp(emitInfo->vt, &newSrc[i]);
601 }
602
603 return inst;
604 }
605
606
607
608 /**
609 * Put a comment on the given instruction.
610 */
611 static void
612 inst_comment(struct prog_instruction *inst, const char *comment)
613 {
614 if (inst)
615 inst->Comment = _mesa_strdup(comment);
616 }
617
618
619
620 /**
621 * Return pointer to last instruction in program.
622 */
623 static struct prog_instruction *
624 prev_instruction(slang_emit_info *emitInfo)
625 {
626 struct gl_program *prog = emitInfo->prog;
627 if (prog->NumInstructions == 0)
628 return NULL;
629 else
630 return prog->Instructions + prog->NumInstructions - 1;
631 }
632
633
634 static struct prog_instruction *
635 emit(slang_emit_info *emitInfo, slang_ir_node *n);
636
637
638 /**
639 * Return an annotation string for given node's storage.
640 */
641 static char *
642 storage_annotation(const slang_ir_node *n, const struct gl_program *prog)
643 {
644 #if ANNOTATE
645 const slang_ir_storage *st = n->Store;
646 static char s[100] = "";
647
648 if (!st)
649 return _mesa_strdup("");
650
651 switch (st->File) {
652 case PROGRAM_CONSTANT:
653 if (st->Index >= 0) {
654 const GLfloat *val = prog->Parameters->ParameterValues[st->Index];
655 if (st->Swizzle == SWIZZLE_NOOP)
656 sprintf(s, "{%g, %g, %g, %g}", val[0], val[1], val[2], val[3]);
657 else {
658 sprintf(s, "%g", val[GET_SWZ(st->Swizzle, 0)]);
659 }
660 }
661 break;
662 case PROGRAM_TEMPORARY:
663 if (n->Var)
664 sprintf(s, "%s", (char *) n->Var->a_name);
665 else
666 sprintf(s, "t[%d]", st->Index);
667 break;
668 case PROGRAM_STATE_VAR:
669 case PROGRAM_UNIFORM:
670 sprintf(s, "%s", prog->Parameters->Parameters[st->Index].Name);
671 break;
672 case PROGRAM_VARYING:
673 sprintf(s, "%s", prog->Varying->Parameters[st->Index].Name);
674 break;
675 case PROGRAM_INPUT:
676 sprintf(s, "input[%d]", st->Index);
677 break;
678 case PROGRAM_OUTPUT:
679 sprintf(s, "output[%d]", st->Index);
680 break;
681 default:
682 s[0] = 0;
683 }
684 return _mesa_strdup(s);
685 #else
686 return NULL;
687 #endif
688 }
689
690
691 /**
692 * Return an annotation string for an instruction.
693 */
694 static char *
695 instruction_annotation(gl_inst_opcode opcode, char *dstAnnot,
696 char *srcAnnot0, char *srcAnnot1, char *srcAnnot2)
697 {
698 #if ANNOTATE
699 const char *operator;
700 char *s;
701 int len = 50;
702
703 if (dstAnnot)
704 len += strlen(dstAnnot);
705 else
706 dstAnnot = _mesa_strdup("");
707
708 if (srcAnnot0)
709 len += strlen(srcAnnot0);
710 else
711 srcAnnot0 = _mesa_strdup("");
712
713 if (srcAnnot1)
714 len += strlen(srcAnnot1);
715 else
716 srcAnnot1 = _mesa_strdup("");
717
718 if (srcAnnot2)
719 len += strlen(srcAnnot2);
720 else
721 srcAnnot2 = _mesa_strdup("");
722
723 switch (opcode) {
724 case OPCODE_ADD:
725 operator = "+";
726 break;
727 case OPCODE_SUB:
728 operator = "-";
729 break;
730 case OPCODE_MUL:
731 operator = "*";
732 break;
733 case OPCODE_DP2:
734 operator = "DP2";
735 break;
736 case OPCODE_DP3:
737 operator = "DP3";
738 break;
739 case OPCODE_DP4:
740 operator = "DP4";
741 break;
742 case OPCODE_XPD:
743 operator = "XPD";
744 break;
745 case OPCODE_RSQ:
746 operator = "RSQ";
747 break;
748 case OPCODE_SGT:
749 operator = ">";
750 break;
751 default:
752 operator = ",";
753 }
754
755 s = (char *) malloc(len);
756 sprintf(s, "%s = %s %s %s %s", dstAnnot,
757 srcAnnot0, operator, srcAnnot1, srcAnnot2);
758 assert(_mesa_strlen(s) < len);
759
760 free(dstAnnot);
761 free(srcAnnot0);
762 free(srcAnnot1);
763 free(srcAnnot2);
764
765 return s;
766 #else
767 return NULL;
768 #endif
769 }
770
771
772 /**
773 * Emit an instruction that's just a comment.
774 */
775 static struct prog_instruction *
776 emit_comment(slang_emit_info *emitInfo, const char *comment)
777 {
778 struct prog_instruction *inst = new_instruction(emitInfo, OPCODE_NOP);
779 if (inst) {
780 inst_comment(inst, comment);
781 }
782 return inst;
783 }
784
785
786 /**
787 * Generate code for a simple arithmetic instruction.
788 * Either 1, 2 or 3 operands.
789 */
790 static struct prog_instruction *
791 emit_arith(slang_emit_info *emitInfo, slang_ir_node *n)
792 {
793 const slang_ir_info *info = _slang_ir_info(n->Opcode);
794 struct prog_instruction *inst;
795 GLuint i;
796
797 assert(info);
798 assert(info->InstOpcode != OPCODE_NOP);
799
800 #if PEEPHOLE_OPTIMIZATIONS
801 /* Look for MAD opportunity */
802 if (info->NumParams == 2 &&
803 n->Opcode == IR_ADD && n->Children[0]->Opcode == IR_MUL) {
804 /* found pattern IR_ADD(IR_MUL(A, B), C) */
805 emit(emitInfo, n->Children[0]->Children[0]); /* A */
806 emit(emitInfo, n->Children[0]->Children[1]); /* B */
807 emit(emitInfo, n->Children[1]); /* C */
808 if (!alloc_node_storage(emitInfo, n, -1)) { /* dest */
809 return NULL;
810 }
811
812 inst = emit_instruction(emitInfo,
813 OPCODE_MAD,
814 n->Store,
815 n->Children[0]->Children[0]->Store,
816 n->Children[0]->Children[1]->Store,
817 n->Children[1]->Store);
818
819 free_node_storage(emitInfo->vt, n->Children[0]->Children[0]);
820 free_node_storage(emitInfo->vt, n->Children[0]->Children[1]);
821 free_node_storage(emitInfo->vt, n->Children[1]);
822 return inst;
823 }
824
825 if (info->NumParams == 2 &&
826 n->Opcode == IR_ADD && n->Children[1]->Opcode == IR_MUL) {
827 /* found pattern IR_ADD(A, IR_MUL(B, C)) */
828 emit(emitInfo, n->Children[0]); /* A */
829 emit(emitInfo, n->Children[1]->Children[0]); /* B */
830 emit(emitInfo, n->Children[1]->Children[1]); /* C */
831 if (!alloc_node_storage(emitInfo, n, -1)) { /* dest */
832 return NULL;
833 }
834
835 inst = emit_instruction(emitInfo,
836 OPCODE_MAD,
837 n->Store,
838 n->Children[1]->Children[0]->Store,
839 n->Children[1]->Children[1]->Store,
840 n->Children[0]->Store);
841
842 free_node_storage(emitInfo->vt, n->Children[1]->Children[0]);
843 free_node_storage(emitInfo->vt, n->Children[1]->Children[1]);
844 free_node_storage(emitInfo->vt, n->Children[0]);
845 return inst;
846 }
847 #endif
848
849 /* gen code for children, may involve temp allocation */
850 for (i = 0; i < info->NumParams; i++) {
851 emit(emitInfo, n->Children[i]);
852 if (!n->Children[i] || !n->Children[i]->Store) {
853 /* error recovery */
854 return NULL;
855 }
856 }
857
858 /* result storage */
859 if (!alloc_node_storage(emitInfo, n, -1)) {
860 return NULL;
861 }
862
863 inst = emit_instruction(emitInfo,
864 info->InstOpcode,
865 n->Store, /* dest */
866 (info->NumParams > 0 ? n->Children[0]->Store : NULL),
867 (info->NumParams > 1 ? n->Children[1]->Store : NULL),
868 (info->NumParams > 2 ? n->Children[2]->Store : NULL)
869 );
870
871 /* free temps */
872 for (i = 0; i < info->NumParams; i++)
873 free_node_storage(emitInfo->vt, n->Children[i]);
874
875 return inst;
876 }
877
878
879 /**
880 * Emit code for == and != operators. These could normally be handled
881 * by emit_arith() except we need to be able to handle structure comparisons.
882 */
883 static struct prog_instruction *
884 emit_compare(slang_emit_info *emitInfo, slang_ir_node *n)
885 {
886 struct prog_instruction *inst = NULL;
887 GLint size;
888
889 assert(n->Opcode == IR_EQUAL || n->Opcode == IR_NOTEQUAL);
890
891 /* gen code for children */
892 emit(emitInfo, n->Children[0]);
893 emit(emitInfo, n->Children[1]);
894
895 if (n->Children[0]->Store->Size != n->Children[1]->Store->Size) {
896 /* XXX this error should have been caught in slang_codegen.c */
897 slang_info_log_error(emitInfo->log, "invalid operands to == or !=");
898 n->Store = NULL;
899 return NULL;
900 }
901
902 /* final result is 1 bool */
903 if (!alloc_node_storage(emitInfo, n, 1))
904 return NULL;
905
906 size = n->Children[0]->Store->Size;
907
908 if (size == 1) {
909 gl_inst_opcode opcode = n->Opcode == IR_EQUAL ? OPCODE_SEQ : OPCODE_SNE;
910 inst = emit_instruction(emitInfo,
911 opcode,
912 n->Store, /* dest */
913 n->Children[0]->Store,
914 n->Children[1]->Store,
915 NULL);
916 }
917 else if (size <= 4) {
918 /* compare two vectors.
919 * Unfortunately, there's no instruction to compare vectors and
920 * return a scalar result. Do it with some compare and dot product
921 * instructions...
922 */
923 GLuint swizzle;
924 gl_inst_opcode dotOp;
925 slang_ir_storage tempStore;
926
927 if (!alloc_local_temp(emitInfo, &tempStore, 4)) {
928 n->Store = NULL;
929 return NULL;
930 /* out of temps */
931 }
932
933 if (size == 4) {
934 dotOp = OPCODE_DP4;
935 swizzle = SWIZZLE_XYZW;
936 }
937 else if (size == 3) {
938 dotOp = OPCODE_DP3;
939 swizzle = SWIZZLE_XYZW;
940 }
941 else {
942 assert(size == 2);
943 dotOp = OPCODE_DP3; /* XXX use OPCODE_DP2 eventually */
944 swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y);
945 }
946
947 /* Compute inequality (temp = (A != B)) */
948 inst = emit_instruction(emitInfo,
949 OPCODE_SNE,
950 &tempStore,
951 n->Children[0]->Store,
952 n->Children[1]->Store,
953 NULL);
954 if (!inst) {
955 return NULL;
956 }
957 inst_comment(inst, "Compare values");
958
959 /* Compute val = DOT(temp, temp) (reduction) */
960 inst = emit_instruction(emitInfo,
961 dotOp,
962 n->Store,
963 &tempStore,
964 &tempStore,
965 NULL);
966 if (!inst) {
967 return NULL;
968 }
969 inst->SrcReg[0].Swizzle = inst->SrcReg[1].Swizzle = swizzle; /*override*/
970 inst_comment(inst, "Reduce vec to bool");
971
972 _slang_free_temp(emitInfo->vt, &tempStore); /* free temp */
973
974 if (n->Opcode == IR_EQUAL) {
975 /* compute val = !val.x with SEQ val, val, 0; */
976 slang_ir_storage zero;
977 constant_to_storage(emitInfo, 0.0, &zero);
978 inst = emit_instruction(emitInfo,
979 OPCODE_SEQ,
980 n->Store, /* dest */
981 n->Store,
982 &zero,
983 NULL);
984 if (!inst) {
985 return NULL;
986 }
987 inst_comment(inst, "Invert true/false");
988 }
989 }
990 else {
991 /* size > 4, struct or array compare.
992 * XXX this won't work reliably for structs with padding!!
993 */
994 GLint i, num = (n->Children[0]->Store->Size + 3) / 4;
995 slang_ir_storage accTemp, sneTemp;
996
997 if (!alloc_local_temp(emitInfo, &accTemp, 4))
998 return NULL;
999
1000 if (!alloc_local_temp(emitInfo, &sneTemp, 4))
1001 return NULL;
1002
1003 for (i = 0; i < num; i++) {
1004 slang_ir_storage srcStore0 = *n->Children[0]->Store;
1005 slang_ir_storage srcStore1 = *n->Children[1]->Store;
1006 srcStore0.Index += i;
1007 srcStore1.Index += i;
1008
1009 if (i == 0) {
1010 /* SNE accTemp, left[i], right[i] */
1011 inst = emit_instruction(emitInfo, OPCODE_SNE,
1012 &accTemp, /* dest */
1013 &srcStore0,
1014 &srcStore1,
1015 NULL);
1016 if (!inst) {
1017 return NULL;
1018 }
1019 inst_comment(inst, "Begin struct/array comparison");
1020 }
1021 else {
1022 /* SNE sneTemp, left[i], right[i] */
1023 inst = emit_instruction(emitInfo, OPCODE_SNE,
1024 &sneTemp, /* dest */
1025 &srcStore0,
1026 &srcStore1,
1027 NULL);
1028 if (!inst) {
1029 return NULL;
1030 }
1031 /* ADD accTemp, accTemp, sneTemp; # like logical-OR */
1032 inst = emit_instruction(emitInfo, OPCODE_ADD,
1033 &accTemp, /* dest */
1034 &accTemp,
1035 &sneTemp,
1036 NULL);
1037 if (!inst) {
1038 return NULL;
1039 }
1040 }
1041 }
1042
1043 /* compute accTemp.x || accTemp.y || accTemp.z || accTemp.w with DOT4 */
1044 inst = emit_instruction(emitInfo, OPCODE_DP4,
1045 n->Store,
1046 &accTemp,
1047 &accTemp,
1048 NULL);
1049 if (!inst) {
1050 return NULL;
1051 }
1052 inst_comment(inst, "End struct/array comparison");
1053
1054 if (n->Opcode == IR_EQUAL) {
1055 /* compute tmp.x = !tmp.x via tmp.x = (tmp.x == 0) */
1056 slang_ir_storage zero;
1057 constant_to_storage(emitInfo, 0.0, &zero);
1058 inst = emit_instruction(emitInfo, OPCODE_SEQ,
1059 n->Store, /* dest */
1060 n->Store,
1061 &zero,
1062 NULL);
1063 if (!inst) {
1064 return NULL;
1065 }
1066 inst_comment(inst, "Invert true/false");
1067 }
1068
1069 _slang_free_temp(emitInfo->vt, &accTemp);
1070 _slang_free_temp(emitInfo->vt, &sneTemp);
1071 }
1072
1073 /* free temps */
1074 free_node_storage(emitInfo->vt, n->Children[0]);
1075 free_node_storage(emitInfo->vt, n->Children[1]);
1076
1077 return inst;
1078 }
1079
1080
1081
1082 /**
1083 * Generate code for an IR_CLAMP instruction.
1084 */
1085 static struct prog_instruction *
1086 emit_clamp(slang_emit_info *emitInfo, slang_ir_node *n)
1087 {
1088 struct prog_instruction *inst;
1089 slang_ir_node tmpNode;
1090
1091 assert(n->Opcode == IR_CLAMP);
1092 /* ch[0] = value
1093 * ch[1] = min limit
1094 * ch[2] = max limit
1095 */
1096
1097 inst = emit(emitInfo, n->Children[0]);
1098
1099 /* If lower limit == 0.0 and upper limit == 1.0,
1100 * set prev instruction's SaturateMode field to SATURATE_ZERO_ONE.
1101 * Else,
1102 * emit OPCODE_MIN, OPCODE_MAX sequence.
1103 */
1104 #if 0
1105 /* XXX this isn't quite finished yet */
1106 if (n->Children[1]->Opcode == IR_FLOAT &&
1107 n->Children[1]->Value[0] == 0.0 &&
1108 n->Children[1]->Value[1] == 0.0 &&
1109 n->Children[1]->Value[2] == 0.0 &&
1110 n->Children[1]->Value[3] == 0.0 &&
1111 n->Children[2]->Opcode == IR_FLOAT &&
1112 n->Children[2]->Value[0] == 1.0 &&
1113 n->Children[2]->Value[1] == 1.0 &&
1114 n->Children[2]->Value[2] == 1.0 &&
1115 n->Children[2]->Value[3] == 1.0) {
1116 if (!inst) {
1117 inst = prev_instruction(prog);
1118 }
1119 if (inst && inst->Opcode != OPCODE_NOP) {
1120 /* and prev instruction's DstReg matches n->Children[0]->Store */
1121 inst->SaturateMode = SATURATE_ZERO_ONE;
1122 n->Store = n->Children[0]->Store;
1123 return inst;
1124 }
1125 }
1126 #endif
1127
1128 if (!alloc_node_storage(emitInfo, n, n->Children[0]->Store->Size))
1129 return NULL;
1130
1131 emit(emitInfo, n->Children[1]);
1132 emit(emitInfo, n->Children[2]);
1133
1134 /* Some GPUs don't allow reading from output registers. So if the
1135 * dest for this clamp() is an output reg, we can't use that reg for
1136 * the intermediate result. Use a temp register instead.
1137 */
1138 _mesa_bzero(&tmpNode, sizeof(tmpNode));
1139 if (!alloc_node_storage(emitInfo, &tmpNode, n->Store->Size)) {
1140 return NULL;
1141 }
1142
1143 /* tmp = max(ch[0], ch[1]) */
1144 inst = emit_instruction(emitInfo, OPCODE_MAX,
1145 tmpNode.Store, /* dest */
1146 n->Children[0]->Store,
1147 n->Children[1]->Store,
1148 NULL);
1149 if (!inst) {
1150 return NULL;
1151 }
1152
1153 /* n->dest = min(tmp, ch[2]) */
1154 inst = emit_instruction(emitInfo, OPCODE_MIN,
1155 n->Store, /* dest */
1156 tmpNode.Store,
1157 n->Children[2]->Store,
1158 NULL);
1159
1160 free_node_storage(emitInfo->vt, &tmpNode);
1161
1162 return inst;
1163 }
1164
1165
1166 static struct prog_instruction *
1167 emit_negation(slang_emit_info *emitInfo, slang_ir_node *n)
1168 {
1169 /* Implement as MOV dst, -src; */
1170 /* XXX we could look at the previous instruction and in some circumstances
1171 * modify it to accomplish the negation.
1172 */
1173 struct prog_instruction *inst;
1174
1175 emit(emitInfo, n->Children[0]);
1176
1177 if (!alloc_node_storage(emitInfo, n, n->Children[0]->Store->Size))
1178 return NULL;
1179
1180 inst = emit_instruction(emitInfo,
1181 OPCODE_MOV,
1182 n->Store, /* dest */
1183 n->Children[0]->Store,
1184 NULL,
1185 NULL);
1186 if (inst) {
1187 inst->SrcReg[0].Negate = NEGATE_XYZW;
1188 }
1189 return inst;
1190 }
1191
1192
1193 static struct prog_instruction *
1194 emit_label(slang_emit_info *emitInfo, const slang_ir_node *n)
1195 {
1196 assert(n->Label);
1197 #if 0
1198 /* XXX this fails in loop tail code - investigate someday */
1199 assert(_slang_label_get_location(n->Label) < 0);
1200 _slang_label_set_location(n->Label, emitInfo->prog->NumInstructions,
1201 emitInfo->prog);
1202 #else
1203 if (_slang_label_get_location(n->Label) < 0)
1204 _slang_label_set_location(n->Label, emitInfo->prog->NumInstructions,
1205 emitInfo->prog);
1206 #endif
1207 return NULL;
1208 }
1209
1210
1211 /**
1212 * Emit code for a function call.
1213 * Note that for each time a function is called, we emit the function's
1214 * body code again because the set of available registers may be different.
1215 */
1216 static struct prog_instruction *
1217 emit_fcall(slang_emit_info *emitInfo, slang_ir_node *n)
1218 {
1219 struct gl_program *progSave;
1220 struct prog_instruction *inst;
1221 GLuint subroutineId;
1222 GLuint maxInstSave;
1223
1224 assert(n->Opcode == IR_CALL);
1225 assert(n->Label);
1226
1227 /* save/push cur program */
1228 maxInstSave = emitInfo->MaxInstructions;
1229 progSave = emitInfo->prog;
1230
1231 emitInfo->prog = new_subroutine(emitInfo, &subroutineId);
1232 emitInfo->MaxInstructions = emitInfo->prog->NumInstructions;
1233
1234 _slang_label_set_location(n->Label, emitInfo->prog->NumInstructions,
1235 emitInfo->prog);
1236
1237 if (emitInfo->EmitBeginEndSub) {
1238 /* BGNSUB isn't a real instruction.
1239 * We require a label (i.e. "foobar:") though, if we're going to
1240 * print the program in the NV format. The BNGSUB instruction is
1241 * really just a NOP to attach the label to.
1242 */
1243 inst = new_instruction(emitInfo, OPCODE_BGNSUB);
1244 if (!inst) {
1245 return NULL;
1246 }
1247 inst_comment(inst, n->Label->Name);
1248 }
1249
1250 /* body of function: */
1251 emit(emitInfo, n->Children[0]);
1252 n->Store = n->Children[0]->Store;
1253
1254 /* add RET instruction now, if needed */
1255 inst = prev_instruction(emitInfo);
1256 if (inst && inst->Opcode != OPCODE_RET) {
1257 inst = new_instruction(emitInfo, OPCODE_RET);
1258 if (!inst) {
1259 return NULL;
1260 }
1261 }
1262
1263 if (emitInfo->EmitBeginEndSub) {
1264 inst = new_instruction(emitInfo, OPCODE_ENDSUB);
1265 if (!inst) {
1266 return NULL;
1267 }
1268 inst_comment(inst, n->Label->Name);
1269 }
1270
1271 /* pop/restore cur program */
1272 emitInfo->prog = progSave;
1273 emitInfo->MaxInstructions = maxInstSave;
1274
1275 /* emit the function call */
1276 inst = new_instruction(emitInfo, OPCODE_CAL);
1277 if (!inst) {
1278 return NULL;
1279 }
1280 /* The branch target is just the subroutine number (changed later) */
1281 inst->BranchTarget = subroutineId;
1282 inst_comment(inst, n->Label->Name);
1283 assert(inst->BranchTarget >= 0);
1284
1285 return inst;
1286 }
1287
1288
1289 /**
1290 * Emit code for a 'return' statement.
1291 */
1292 static struct prog_instruction *
1293 emit_return(slang_emit_info *emitInfo, slang_ir_node *n)
1294 {
1295 struct prog_instruction *inst;
1296 assert(n);
1297 assert(n->Opcode == IR_RETURN);
1298 assert(n->Label);
1299 inst = new_instruction(emitInfo, OPCODE_RET);
1300 if (inst) {
1301 inst->DstReg.CondMask = COND_TR; /* always return */
1302 }
1303 return inst;
1304 }
1305
1306
1307 static struct prog_instruction *
1308 emit_kill(slang_emit_info *emitInfo)
1309 {
1310 struct gl_fragment_program *fp;
1311 struct prog_instruction *inst;
1312 /* NV-KILL - discard fragment depending on condition code.
1313 * Note that ARB-KILL depends on sign of vector operand.
1314 */
1315 inst = new_instruction(emitInfo, OPCODE_KIL_NV);
1316 if (!inst) {
1317 return NULL;
1318 }
1319 inst->DstReg.CondMask = COND_TR; /* always kill */
1320
1321 assert(emitInfo->prog->Target == GL_FRAGMENT_PROGRAM_ARB);
1322 fp = (struct gl_fragment_program *) emitInfo->prog;
1323 fp->UsesKill = GL_TRUE;
1324
1325 return inst;
1326 }
1327
1328
1329 static struct prog_instruction *
1330 emit_tex(slang_emit_info *emitInfo, slang_ir_node *n)
1331 {
1332 struct prog_instruction *inst;
1333 gl_inst_opcode opcode;
1334 GLboolean shadow = GL_FALSE;
1335
1336 switch (n->Opcode) {
1337 case IR_TEX:
1338 opcode = OPCODE_TEX;
1339 break;
1340 case IR_TEX_SH:
1341 opcode = OPCODE_TEX;
1342 shadow = GL_TRUE;
1343 break;
1344 case IR_TEXB:
1345 opcode = OPCODE_TXB;
1346 break;
1347 case IR_TEXB_SH:
1348 opcode = OPCODE_TXB;
1349 shadow = GL_TRUE;
1350 break;
1351 case IR_TEXP:
1352 opcode = OPCODE_TXP;
1353 break;
1354 case IR_TEXP_SH:
1355 opcode = OPCODE_TXP;
1356 shadow = GL_TRUE;
1357 break;
1358 default:
1359 _mesa_problem(NULL, "Bad IR TEX code");
1360 return NULL;
1361 }
1362
1363 if (n->Children[0]->Opcode == IR_ELEMENT) {
1364 /* array is the sampler (a uniform which'll indicate the texture unit) */
1365 assert(n->Children[0]->Children[0]->Store);
1366 assert(n->Children[0]->Children[0]->Store->File == PROGRAM_SAMPLER);
1367
1368 emit(emitInfo, n->Children[0]);
1369
1370 n->Children[0]->Var = n->Children[0]->Children[0]->Var;
1371 } else {
1372 /* this is the sampler (a uniform which'll indicate the texture unit) */
1373 assert(n->Children[0]->Store);
1374 assert(n->Children[0]->Store->File == PROGRAM_SAMPLER);
1375 }
1376
1377 /* emit code for the texcoord operand */
1378 (void) emit(emitInfo, n->Children[1]);
1379
1380 /* alloc storage for result of texture fetch */
1381 if (!alloc_node_storage(emitInfo, n, 4))
1382 return NULL;
1383
1384 /* emit TEX instruction; Child[1] is the texcoord */
1385 inst = emit_instruction(emitInfo,
1386 opcode,
1387 n->Store,
1388 n->Children[1]->Store,
1389 NULL,
1390 NULL);
1391 if (!inst) {
1392 return NULL;
1393 }
1394
1395 inst->TexShadow = shadow;
1396
1397 /* Store->Index is the uniform/sampler index */
1398 assert(n->Children[0]->Store->Index >= 0);
1399 inst->TexSrcUnit = n->Children[0]->Store->Index;
1400 inst->TexSrcTarget = n->Children[0]->Store->TexTarget;
1401
1402 /* mark the sampler as being used */
1403 _mesa_use_uniform(emitInfo->prog->Parameters,
1404 (char *) n->Children[0]->Var->a_name);
1405
1406 return inst;
1407 }
1408
1409
1410 /**
1411 * Assignment/copy
1412 */
1413 static struct prog_instruction *
1414 emit_copy(slang_emit_info *emitInfo, slang_ir_node *n)
1415 {
1416 struct prog_instruction *inst;
1417
1418 assert(n->Opcode == IR_COPY);
1419
1420 /* lhs */
1421 emit(emitInfo, n->Children[0]);
1422 if (!n->Children[0]->Store || n->Children[0]->Store->Index < 0) {
1423 /* an error should have been already recorded */
1424 return NULL;
1425 }
1426
1427 /* rhs */
1428 assert(n->Children[1]);
1429 inst = emit(emitInfo, n->Children[1]);
1430
1431 if (!n->Children[1]->Store || n->Children[1]->Store->Index < 0) {
1432 if (!emitInfo->log->text && !emitInfo->UnresolvedFunctions) {
1433 /* XXX this error should have been caught in slang_codegen.c */
1434 slang_info_log_error(emitInfo->log, "invalid assignment");
1435 }
1436 return NULL;
1437 }
1438
1439 assert(n->Children[1]->Store->Index >= 0);
1440
1441 /*assert(n->Children[0]->Store->Size == n->Children[1]->Store->Size);*/
1442
1443 n->Store = n->Children[0]->Store;
1444
1445 if (n->Store->File == PROGRAM_SAMPLER) {
1446 /* no code generated for sampler assignments,
1447 * just copy the sampler index/target at compile time.
1448 */
1449 n->Store->Index = n->Children[1]->Store->Index;
1450 n->Store->TexTarget = n->Children[1]->Store->TexTarget;
1451 return NULL;
1452 }
1453
1454 #if PEEPHOLE_OPTIMIZATIONS
1455 if (inst &&
1456 (n->Children[1]->Opcode != IR_SWIZZLE) &&
1457 _slang_is_temp(emitInfo->vt, n->Children[1]->Store) &&
1458 (inst->DstReg.File == n->Children[1]->Store->File) &&
1459 (inst->DstReg.Index == n->Children[1]->Store->Index) &&
1460 !n->Children[0]->Store->IsIndirect &&
1461 n->Children[0]->Store->Size <= 4) {
1462 /* Peephole optimization:
1463 * The Right-Hand-Side has its results in a temporary place.
1464 * Modify the RHS (and the prev instruction) to store its results
1465 * in the destination specified by n->Children[0].
1466 * Then, this MOVE is a no-op.
1467 * Ex:
1468 * MUL tmp, x, y;
1469 * MOV a, tmp;
1470 * becomes:
1471 * MUL a, x, y;
1472 */
1473
1474 /* fixup the previous instruction (which stored the RHS result) */
1475 assert(n->Children[0]->Store->Index >= 0);
1476 storage_to_dst_reg(&inst->DstReg, n->Children[0]->Store);
1477 return inst;
1478 }
1479 else
1480 #endif
1481 {
1482 if (n->Children[0]->Store->Size > 4) {
1483 /* move matrix/struct etc (block of registers) */
1484 slang_ir_storage dstStore = *n->Children[0]->Store;
1485 slang_ir_storage srcStore = *n->Children[1]->Store;
1486 GLint size = srcStore.Size;
1487 ASSERT(n->Children[1]->Store->Swizzle == SWIZZLE_NOOP);
1488 dstStore.Size = 4;
1489 srcStore.Size = 4;
1490 while (size >= 4) {
1491 inst = emit_instruction(emitInfo, OPCODE_MOV,
1492 &dstStore,
1493 &srcStore,
1494 NULL,
1495 NULL);
1496 if (!inst) {
1497 return NULL;
1498 }
1499 inst_comment(inst, "IR_COPY block");
1500 srcStore.Index++;
1501 dstStore.Index++;
1502 size -= 4;
1503 }
1504 }
1505 else {
1506 /* single register move */
1507 char *srcAnnot, *dstAnnot;
1508 assert(n->Children[0]->Store->Index >= 0);
1509 inst = emit_instruction(emitInfo, OPCODE_MOV,
1510 n->Children[0]->Store, /* dest */
1511 n->Children[1]->Store,
1512 NULL,
1513 NULL);
1514 if (!inst) {
1515 return NULL;
1516 }
1517 dstAnnot = storage_annotation(n->Children[0], emitInfo->prog);
1518 srcAnnot = storage_annotation(n->Children[1], emitInfo->prog);
1519 inst->Comment = instruction_annotation(inst->Opcode, dstAnnot,
1520 srcAnnot, NULL, NULL);
1521 }
1522 free_node_storage(emitInfo->vt, n->Children[1]);
1523 return inst;
1524 }
1525 }
1526
1527
1528 /**
1529 * An IR_COND node wraps a boolean expression which is used by an
1530 * IF or WHILE test. This is where we'll set condition codes, if needed.
1531 */
1532 static struct prog_instruction *
1533 emit_cond(slang_emit_info *emitInfo, slang_ir_node *n)
1534 {
1535 struct prog_instruction *inst;
1536
1537 assert(n->Opcode == IR_COND);
1538
1539 if (!n->Children[0])
1540 return NULL;
1541
1542 /* emit code for the expression */
1543 inst = emit(emitInfo, n->Children[0]);
1544
1545 if (!n->Children[0]->Store) {
1546 /* error recovery */
1547 return NULL;
1548 }
1549
1550 assert(n->Children[0]->Store);
1551 /*assert(n->Children[0]->Store->Size == 1);*/
1552
1553 if (emitInfo->EmitCondCodes) {
1554 if (inst &&
1555 n->Children[0]->Store &&
1556 inst->DstReg.File == n->Children[0]->Store->File &&
1557 inst->DstReg.Index == n->Children[0]->Store->Index) {
1558 /* The previous instruction wrote to the register who's value
1559 * we're testing. Just fix that instruction so that the
1560 * condition codes are computed.
1561 */
1562 inst->CondUpdate = GL_TRUE;
1563 n->Store = n->Children[0]->Store;
1564 return inst;
1565 }
1566 else {
1567 /* This'll happen for things like "if (i) ..." where no code
1568 * is normally generated for the expression "i".
1569 * Generate a move instruction just to set condition codes.
1570 */
1571 if (!alloc_node_storage(emitInfo, n, 1))
1572 return NULL;
1573 inst = emit_instruction(emitInfo, OPCODE_MOV,
1574 n->Store, /* dest */
1575 n->Children[0]->Store,
1576 NULL,
1577 NULL);
1578 if (!inst) {
1579 return NULL;
1580 }
1581 inst->CondUpdate = GL_TRUE;
1582 inst_comment(inst, "COND expr");
1583 _slang_free_temp(emitInfo->vt, n->Store);
1584 return inst;
1585 }
1586 }
1587 else {
1588 /* No-op: the boolean result of the expression is in a regular reg */
1589 n->Store = n->Children[0]->Store;
1590 return inst;
1591 }
1592 }
1593
1594
1595 /**
1596 * Logical-NOT
1597 */
1598 static struct prog_instruction *
1599 emit_not(slang_emit_info *emitInfo, slang_ir_node *n)
1600 {
1601 static const struct {
1602 gl_inst_opcode op, opNot;
1603 } operators[] = {
1604 { OPCODE_SLT, OPCODE_SGE },
1605 { OPCODE_SLE, OPCODE_SGT },
1606 { OPCODE_SGT, OPCODE_SLE },
1607 { OPCODE_SGE, OPCODE_SLT },
1608 { OPCODE_SEQ, OPCODE_SNE },
1609 { OPCODE_SNE, OPCODE_SEQ },
1610 { 0, 0 }
1611 };
1612 struct prog_instruction *inst;
1613 slang_ir_storage zero;
1614 GLuint i;
1615
1616 /* child expr */
1617 inst = emit(emitInfo, n->Children[0]);
1618
1619 #if PEEPHOLE_OPTIMIZATIONS
1620 if (inst) {
1621 /* if the prev instruction was a comparison instruction, invert it */
1622 for (i = 0; operators[i].op; i++) {
1623 if (inst->Opcode == operators[i].op) {
1624 inst->Opcode = operators[i].opNot;
1625 n->Store = n->Children[0]->Store;
1626 return inst;
1627 }
1628 }
1629 }
1630 #endif
1631
1632 /* else, invert using SEQ (v = v == 0) */
1633 if (!alloc_node_storage(emitInfo, n, n->Children[0]->Store->Size))
1634 return NULL;
1635
1636 constant_to_storage(emitInfo, 0.0, &zero);
1637 inst = emit_instruction(emitInfo,
1638 OPCODE_SEQ,
1639 n->Store,
1640 n->Children[0]->Store,
1641 &zero,
1642 NULL);
1643 if (!inst) {
1644 return NULL;
1645 }
1646 inst_comment(inst, "NOT");
1647
1648 free_node_storage(emitInfo->vt, n->Children[0]);
1649
1650 return inst;
1651 }
1652
1653
1654 static struct prog_instruction *
1655 emit_if(slang_emit_info *emitInfo, slang_ir_node *n)
1656 {
1657 struct gl_program *prog = emitInfo->prog;
1658 GLuint ifInstLoc, elseInstLoc = 0;
1659 GLuint condWritemask = 0;
1660
1661 /* emit condition expression code */
1662 {
1663 struct prog_instruction *inst;
1664 inst = emit(emitInfo, n->Children[0]);
1665 if (emitInfo->EmitCondCodes) {
1666 if (!inst) {
1667 /* error recovery */
1668 return NULL;
1669 }
1670 condWritemask = inst->DstReg.WriteMask;
1671 }
1672 }
1673
1674 if (!n->Children[0]->Store)
1675 return NULL;
1676
1677 #if 0
1678 assert(n->Children[0]->Store->Size == 1); /* a bool! */
1679 #endif
1680
1681 ifInstLoc = prog->NumInstructions;
1682 if (emitInfo->EmitHighLevelInstructions) {
1683 if (emitInfo->EmitCondCodes) {
1684 /* IF condcode THEN ... */
1685 struct prog_instruction *ifInst = new_instruction(emitInfo, OPCODE_IF);
1686 if (!ifInst) {
1687 return NULL;
1688 }
1689 ifInst->DstReg.CondMask = COND_NE; /* if cond is non-zero */
1690 /* only test the cond code (1 of 4) that was updated by the
1691 * previous instruction.
1692 */
1693 ifInst->DstReg.CondSwizzle = writemask_to_swizzle(condWritemask);
1694 }
1695 else {
1696 struct prog_instruction *inst;
1697
1698 /* IF src[0] THEN ... */
1699 inst = emit_instruction(emitInfo, OPCODE_IF,
1700 NULL, /* dst */
1701 n->Children[0]->Store, /* op0 */
1702 NULL,
1703 NULL);
1704 if (!inst) {
1705 return NULL;
1706 }
1707 }
1708 }
1709 else {
1710 /* conditional jump to else, or endif */
1711 struct prog_instruction *ifInst = new_instruction(emitInfo, OPCODE_BRA);
1712 if (!ifInst) {
1713 return NULL;
1714 }
1715 ifInst->DstReg.CondMask = COND_EQ; /* BRA if cond is zero */
1716 inst_comment(ifInst, "if zero");
1717 ifInst->DstReg.CondSwizzle = writemask_to_swizzle(condWritemask);
1718 }
1719
1720 /* if body */
1721 emit(emitInfo, n->Children[1]);
1722
1723 if (n->Children[2]) {
1724 /* have else body */
1725 elseInstLoc = prog->NumInstructions;
1726 if (emitInfo->EmitHighLevelInstructions) {
1727 struct prog_instruction *inst = new_instruction(emitInfo, OPCODE_ELSE);
1728 if (!inst) {
1729 return NULL;
1730 }
1731 }
1732 else {
1733 /* jump to endif instruction */
1734 struct prog_instruction *inst = new_instruction(emitInfo, OPCODE_BRA);
1735 if (!inst) {
1736 return NULL;
1737 }
1738 inst_comment(inst, "else");
1739 inst->DstReg.CondMask = COND_TR; /* always branch */
1740 }
1741 prog->Instructions[ifInstLoc].BranchTarget = prog->NumInstructions;
1742 emit(emitInfo, n->Children[2]);
1743 }
1744 else {
1745 /* no else body */
1746 prog->Instructions[ifInstLoc].BranchTarget = prog->NumInstructions;
1747 }
1748
1749 if (emitInfo->EmitHighLevelInstructions) {
1750 struct prog_instruction *inst = new_instruction(emitInfo, OPCODE_ENDIF);
1751 if (!inst) {
1752 return NULL;
1753 }
1754 }
1755
1756 if (n->Children[2]) {
1757 prog->Instructions[elseInstLoc].BranchTarget = prog->NumInstructions;
1758 }
1759 return NULL;
1760 }
1761
1762
1763 static struct prog_instruction *
1764 emit_loop(slang_emit_info *emitInfo, slang_ir_node *n)
1765 {
1766 struct gl_program *prog = emitInfo->prog;
1767 struct prog_instruction *endInst;
1768 GLuint beginInstLoc, tailInstLoc, endInstLoc;
1769 slang_ir_node *ir;
1770
1771 /* emit OPCODE_BGNLOOP */
1772 beginInstLoc = prog->NumInstructions;
1773 if (emitInfo->EmitHighLevelInstructions) {
1774 struct prog_instruction *inst = new_instruction(emitInfo, OPCODE_BGNLOOP);
1775 if (!inst) {
1776 return NULL;
1777 }
1778 }
1779
1780 /* body */
1781 emit(emitInfo, n->Children[0]);
1782
1783 /* tail */
1784 tailInstLoc = prog->NumInstructions;
1785 if (n->Children[1]) {
1786 if (emitInfo->EmitComments)
1787 emit_comment(emitInfo, "Loop tail code:");
1788 emit(emitInfo, n->Children[1]);
1789 }
1790
1791 endInstLoc = prog->NumInstructions;
1792 if (emitInfo->EmitHighLevelInstructions) {
1793 /* emit OPCODE_ENDLOOP */
1794 endInst = new_instruction(emitInfo, OPCODE_ENDLOOP);
1795 if (!endInst) {
1796 return NULL;
1797 }
1798 }
1799 else {
1800 /* emit unconditional BRA-nch */
1801 endInst = new_instruction(emitInfo, OPCODE_BRA);
1802 if (!endInst) {
1803 return NULL;
1804 }
1805 endInst->DstReg.CondMask = COND_TR; /* always true */
1806 }
1807 /* ENDLOOP's BranchTarget points to the BGNLOOP inst */
1808 endInst->BranchTarget = beginInstLoc;
1809
1810 if (emitInfo->EmitHighLevelInstructions) {
1811 /* BGNLOOP's BranchTarget points to the ENDLOOP inst */
1812 prog->Instructions[beginInstLoc].BranchTarget = prog->NumInstructions -1;
1813 }
1814
1815 /* Done emitting loop code. Now walk over the loop's linked list of
1816 * BREAK and CONT nodes, filling in their BranchTarget fields (which
1817 * will point to the ENDLOOP+1 or BGNLOOP instructions, respectively).
1818 */
1819 for (ir = n->List; ir; ir = ir->List) {
1820 struct prog_instruction *inst = prog->Instructions + ir->InstLocation;
1821 assert(inst->BranchTarget < 0);
1822 if (ir->Opcode == IR_BREAK ||
1823 ir->Opcode == IR_BREAK_IF_TRUE) {
1824 assert(inst->Opcode == OPCODE_BRK ||
1825 inst->Opcode == OPCODE_BRA);
1826 /* go to instruction after end of loop */
1827 inst->BranchTarget = endInstLoc + 1;
1828 }
1829 else {
1830 assert(ir->Opcode == IR_CONT ||
1831 ir->Opcode == IR_CONT_IF_TRUE);
1832 assert(inst->Opcode == OPCODE_CONT ||
1833 inst->Opcode == OPCODE_BRA);
1834 /* go to instruction at tail of loop */
1835 inst->BranchTarget = endInstLoc;
1836 }
1837 }
1838 return NULL;
1839 }
1840
1841
1842 /**
1843 * Unconditional "continue" or "break" statement.
1844 * Either OPCODE_CONT, OPCODE_BRK or OPCODE_BRA will be emitted.
1845 */
1846 static struct prog_instruction *
1847 emit_cont_break(slang_emit_info *emitInfo, slang_ir_node *n)
1848 {
1849 gl_inst_opcode opcode;
1850 struct prog_instruction *inst;
1851
1852 if (n->Opcode == IR_CONT) {
1853 /* we need to execute the loop's tail code before doing CONT */
1854 assert(n->Parent);
1855 assert(n->Parent->Opcode == IR_LOOP);
1856 if (n->Parent->Children[1]) {
1857 /* emit tail code */
1858 if (emitInfo->EmitComments) {
1859 emit_comment(emitInfo, "continue - tail code:");
1860 }
1861 emit(emitInfo, n->Parent->Children[1]);
1862 }
1863 }
1864
1865 /* opcode selection */
1866 if (emitInfo->EmitHighLevelInstructions) {
1867 opcode = (n->Opcode == IR_CONT) ? OPCODE_CONT : OPCODE_BRK;
1868 }
1869 else {
1870 opcode = OPCODE_BRA;
1871 }
1872 n->InstLocation = emitInfo->prog->NumInstructions;
1873 inst = new_instruction(emitInfo, opcode);
1874 if (inst) {
1875 inst->DstReg.CondMask = COND_TR; /* always true */
1876 }
1877 return inst;
1878 }
1879
1880
1881 /**
1882 * Conditional "continue" or "break" statement.
1883 * Either OPCODE_CONT, OPCODE_BRK or OPCODE_BRA will be emitted.
1884 */
1885 static struct prog_instruction *
1886 emit_cont_break_if_true(slang_emit_info *emitInfo, slang_ir_node *n)
1887 {
1888 struct prog_instruction *inst;
1889
1890 assert(n->Opcode == IR_CONT_IF_TRUE ||
1891 n->Opcode == IR_BREAK_IF_TRUE);
1892
1893 /* evaluate condition expr, setting cond codes */
1894 inst = emit(emitInfo, n->Children[0]);
1895 if (emitInfo->EmitCondCodes) {
1896 assert(inst);
1897 inst->CondUpdate = GL_TRUE;
1898 }
1899
1900 n->InstLocation = emitInfo->prog->NumInstructions;
1901
1902 /* opcode selection */
1903 if (emitInfo->EmitHighLevelInstructions) {
1904 const gl_inst_opcode opcode
1905 = (n->Opcode == IR_CONT_IF_TRUE) ? OPCODE_CONT : OPCODE_BRK;
1906 if (emitInfo->EmitCondCodes) {
1907 /* Get the writemask from the previous instruction which set
1908 * the condcodes. Use that writemask as the CondSwizzle.
1909 */
1910 const GLuint condWritemask = inst->DstReg.WriteMask;
1911 inst = new_instruction(emitInfo, opcode);
1912 if (inst) {
1913 inst->DstReg.CondMask = COND_NE;
1914 inst->DstReg.CondSwizzle = writemask_to_swizzle(condWritemask);
1915 }
1916 return inst;
1917 }
1918 else {
1919 /* IF reg
1920 * BRK/CONT;
1921 * ENDIF
1922 */
1923 GLint ifInstLoc;
1924 ifInstLoc = emitInfo->prog->NumInstructions;
1925 inst = emit_instruction(emitInfo, OPCODE_IF,
1926 NULL, /* dest */
1927 n->Children[0]->Store,
1928 NULL,
1929 NULL);
1930 if (!inst) {
1931 return NULL;
1932 }
1933 n->InstLocation = emitInfo->prog->NumInstructions;
1934
1935 inst = new_instruction(emitInfo, opcode);
1936 if (!inst) {
1937 return NULL;
1938 }
1939 inst = new_instruction(emitInfo, OPCODE_ENDIF);
1940 if (!inst) {
1941 return NULL;
1942 }
1943
1944 emitInfo->prog->Instructions[ifInstLoc].BranchTarget
1945 = emitInfo->prog->NumInstructions;
1946 return inst;
1947 }
1948 }
1949 else {
1950 const GLuint condWritemask = inst->DstReg.WriteMask;
1951 assert(emitInfo->EmitCondCodes);
1952 inst = new_instruction(emitInfo, OPCODE_BRA);
1953 if (inst) {
1954 inst->DstReg.CondMask = COND_NE;
1955 inst->DstReg.CondSwizzle = writemask_to_swizzle(condWritemask);
1956 }
1957 return inst;
1958 }
1959 }
1960
1961
1962 /**
1963 * Return the size of a swizzle mask given that some swizzle components
1964 * may be NIL/undefined. For example:
1965 * swizzle_size(".zzxx") = 4
1966 * swizzle_size(".xy??") = 2
1967 * swizzle_size(".w???") = 1
1968 */
1969 static GLuint
1970 swizzle_size(GLuint swizzle)
1971 {
1972 GLuint i;
1973 for (i = 0; i < 4; i++) {
1974 if (GET_SWZ(swizzle, i) == SWIZZLE_NIL)
1975 return i;
1976 }
1977 return 4;
1978 }
1979
1980
1981 static struct prog_instruction *
1982 emit_swizzle(slang_emit_info *emitInfo, slang_ir_node *n)
1983 {
1984 struct prog_instruction *inst;
1985
1986 inst = emit(emitInfo, n->Children[0]);
1987
1988 if (!n->Store->Parent) {
1989 /* this covers a case such as "(b ? p : q).x" */
1990 n->Store->Parent = n->Children[0]->Store;
1991 assert(n->Store->Parent);
1992 }
1993
1994 {
1995 const GLuint swizzle = n->Store->Swizzle;
1996 /* new storage is parent storage with updated Swizzle + Size fields */
1997 _slang_copy_ir_storage(n->Store, n->Store->Parent);
1998 /* Apply this node's swizzle to parent's storage */
1999 n->Store->Swizzle = _slang_swizzle_swizzle(n->Store->Swizzle, swizzle);
2000 /* Update size */
2001 n->Store->Size = swizzle_size(n->Store->Swizzle);
2002 }
2003
2004 assert(!n->Store->Parent);
2005 assert(n->Store->Index >= 0);
2006
2007 return inst;
2008 }
2009
2010
2011 /**
2012 * Dereference array element: element == array[index]
2013 * This basically involves emitting code for computing the array index
2014 * and updating the node/element's storage info.
2015 */
2016 static struct prog_instruction *
2017 emit_array_element(slang_emit_info *emitInfo, slang_ir_node *n)
2018 {
2019 slang_ir_storage *arrayStore, *indexStore;
2020 const int elemSize = n->Store->Size; /* number of floats */
2021 const GLint elemSizeVec = (elemSize + 3) / 4; /* number of vec4 */
2022 struct prog_instruction *inst;
2023
2024 assert(n->Opcode == IR_ELEMENT);
2025 assert(elemSize > 0);
2026
2027 /* special case for built-in state variables, like light state */
2028 {
2029 slang_ir_storage *root = n->Store;
2030 assert(!root->Parent);
2031 while (root->Parent)
2032 root = root->Parent;
2033
2034 if (root->File == PROGRAM_STATE_VAR) {
2035 GLboolean direct;
2036 GLint index =
2037 _slang_alloc_statevar(n, emitInfo->prog->Parameters, &direct);
2038 if (index < 0) {
2039 /* error */
2040 return NULL;
2041 }
2042 if (direct) {
2043 n->Store->Index = index;
2044 return NULL; /* all done */
2045 }
2046 }
2047 }
2048
2049 /* do codegen for array itself */
2050 emit(emitInfo, n->Children[0]);
2051 arrayStore = n->Children[0]->Store;
2052
2053 /* The initial array element storage is the array's storage,
2054 * then modified below.
2055 */
2056 _slang_copy_ir_storage(n->Store, arrayStore);
2057
2058
2059 if (n->Children[1]->Opcode == IR_FLOAT) {
2060 /* Constant array index */
2061 const GLint element = (GLint) n->Children[1]->Value[0];
2062
2063 /* this element's storage is the array's storage, plus constant offset */
2064 n->Store->Index += elemSizeVec * element;
2065 }
2066 else {
2067 /* Variable array index */
2068
2069 /* do codegen for array index expression */
2070 emit(emitInfo, n->Children[1]);
2071 indexStore = n->Children[1]->Store;
2072
2073 if (indexStore->IsIndirect) {
2074 /* need to put the array index into a temporary since we can't
2075 * directly support a[b[i]] constructs.
2076 */
2077
2078
2079 /*indexStore = tempstore();*/
2080 }
2081
2082
2083 if (elemSize > 4) {
2084 /* need to multiply array index by array element size */
2085 struct prog_instruction *inst;
2086 slang_ir_storage *indexTemp;
2087 slang_ir_storage elemSizeStore;
2088
2089 /* allocate 1 float indexTemp */
2090 indexTemp = _slang_new_ir_storage(PROGRAM_TEMPORARY, -1, 1);
2091 _slang_alloc_temp(emitInfo->vt, indexTemp);
2092
2093 /* allocate a constant containing the element size */
2094 constant_to_storage(emitInfo, (float) elemSizeVec, &elemSizeStore);
2095
2096 /* multiply array index by element size */
2097 inst = emit_instruction(emitInfo,
2098 OPCODE_MUL,
2099 indexTemp, /* dest */
2100 indexStore, /* the index */
2101 &elemSizeStore,
2102 NULL);
2103 if (!inst) {
2104 return NULL;
2105 }
2106
2107 indexStore = indexTemp;
2108 }
2109
2110 if (arrayStore->IsIndirect) {
2111 /* ex: in a[i][j], a[i] (the arrayStore) is indirect */
2112 /* Need to add indexStore to arrayStore->Indirect store */
2113 slang_ir_storage indirectArray;
2114 slang_ir_storage *indexTemp;
2115
2116 _slang_init_ir_storage(&indirectArray,
2117 arrayStore->IndirectFile,
2118 arrayStore->IndirectIndex,
2119 1,
2120 arrayStore->IndirectSwizzle);
2121
2122 /* allocate 1 float indexTemp */
2123 indexTemp = _slang_new_ir_storage(PROGRAM_TEMPORARY, -1, 1);
2124 _slang_alloc_temp(emitInfo->vt, indexTemp);
2125
2126 inst = emit_instruction(emitInfo,
2127 OPCODE_ADD,
2128 indexTemp, /* dest */
2129 indexStore, /* the index */
2130 &indirectArray, /* indirect array base */
2131 NULL);
2132 if (!inst) {
2133 return NULL;
2134 }
2135
2136 indexStore = indexTemp;
2137 }
2138
2139 /* update the array element storage info */
2140 n->Store->IsIndirect = GL_TRUE;
2141 n->Store->IndirectFile = indexStore->File;
2142 n->Store->IndirectIndex = indexStore->Index;
2143 n->Store->IndirectSwizzle = indexStore->Swizzle;
2144 }
2145
2146 n->Store->Size = elemSize;
2147 n->Store->Swizzle = _slang_var_swizzle(elemSize, 0);
2148
2149 return NULL; /* no instruction */
2150 }
2151
2152
2153 /**
2154 * Resolve storage for accessing a structure field.
2155 */
2156 static struct prog_instruction *
2157 emit_struct_field(slang_emit_info *emitInfo, slang_ir_node *n)
2158 {
2159 slang_ir_storage *root = n->Store;
2160 GLint fieldOffset, fieldSize;
2161
2162 assert(n->Opcode == IR_FIELD);
2163
2164 assert(!root->Parent);
2165 while (root->Parent)
2166 root = root->Parent;
2167
2168 /* If this is the field of a state var, allocate constant/uniform
2169 * storage for it now if we haven't already.
2170 * Note that we allocate storage (uniform/constant slots) for state
2171 * variables here rather than at declaration time so we only allocate
2172 * space for the ones that we actually use!
2173 */
2174 if (root->File == PROGRAM_STATE_VAR) {
2175 GLboolean direct;
2176 GLint index = _slang_alloc_statevar(n, emitInfo->prog->Parameters, &direct);
2177 if (index < 0) {
2178 slang_info_log_error(emitInfo->log, "Error parsing state variable");
2179 return NULL;
2180 }
2181 if (direct) {
2182 root->Index = index;
2183 return NULL; /* all done */
2184 }
2185 }
2186
2187 /* do codegen for struct */
2188 emit(emitInfo, n->Children[0]);
2189 assert(n->Children[0]->Store->Index >= 0);
2190
2191
2192 fieldOffset = n->Store->Index;
2193 fieldSize = n->Store->Size;
2194
2195 _slang_copy_ir_storage(n->Store, n->Children[0]->Store);
2196
2197 n->Store->Index = n->Children[0]->Store->Index + fieldOffset / 4;
2198 n->Store->Size = fieldSize;
2199
2200 switch (fieldSize) {
2201 case 1:
2202 {
2203 GLint swz = fieldOffset % 4;
2204 n->Store->Swizzle = MAKE_SWIZZLE4(swz, swz, swz, swz);
2205 }
2206 break;
2207 case 2:
2208 n->Store->Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
2209 SWIZZLE_NIL, SWIZZLE_NIL);
2210 break;
2211 case 3:
2212 n->Store->Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
2213 SWIZZLE_Z, SWIZZLE_NIL);
2214 break;
2215 default:
2216 n->Store->Swizzle = SWIZZLE_XYZW;
2217 }
2218
2219 assert(n->Store->Index >= 0);
2220
2221 return NULL; /* no instruction */
2222 }
2223
2224
2225 /**
2226 * Emit code for a variable declaration.
2227 * This usually doesn't result in any code generation, but just
2228 * memory allocation.
2229 */
2230 static struct prog_instruction *
2231 emit_var_decl(slang_emit_info *emitInfo, slang_ir_node *n)
2232 {
2233 assert(n->Store);
2234 assert(n->Store->File != PROGRAM_UNDEFINED);
2235 assert(n->Store->Size > 0);
2236 /*assert(n->Store->Index < 0);*/
2237
2238 if (!n->Var || n->Var->isTemp) {
2239 /* a nameless/temporary variable, will be freed after first use */
2240 /*NEW*/
2241 if (n->Store->Index < 0 && !_slang_alloc_temp(emitInfo->vt, n->Store)) {
2242 slang_info_log_error(emitInfo->log,
2243 "Ran out of registers, too many temporaries");
2244 return NULL;
2245 }
2246 }
2247 else {
2248 /* a regular variable */
2249 _slang_add_variable(emitInfo->vt, n->Var);
2250 if (!_slang_alloc_var(emitInfo->vt, n->Store)) {
2251 slang_info_log_error(emitInfo->log,
2252 "Ran out of registers, too many variables");
2253 return NULL;
2254 }
2255 /*
2256 printf("IR_VAR_DECL %s %d store %p\n",
2257 (char*) n->Var->a_name, n->Store->Index, (void*) n->Store);
2258 */
2259 assert(n->Var->store == n->Store);
2260 }
2261 if (emitInfo->EmitComments) {
2262 /* emit NOP with comment describing the variable's storage location */
2263 char s[1000];
2264 sprintf(s, "TEMP[%d]%s = variable %s (size %d)",
2265 n->Store->Index,
2266 _mesa_swizzle_string(n->Store->Swizzle, 0, GL_FALSE),
2267 (n->Var ? (char *) n->Var->a_name : "anonymous"),
2268 n->Store->Size);
2269 emit_comment(emitInfo, s);
2270 }
2271 return NULL;
2272 }
2273
2274
2275 /**
2276 * Emit code for a reference to a variable.
2277 * Actually, no code is generated but we may do some memory allocation.
2278 * In particular, state vars (uniforms) are allocated on an as-needed basis.
2279 */
2280 static struct prog_instruction *
2281 emit_var_ref(slang_emit_info *emitInfo, slang_ir_node *n)
2282 {
2283 assert(n->Store);
2284 assert(n->Store->File != PROGRAM_UNDEFINED);
2285
2286 if (n->Store->File == PROGRAM_STATE_VAR && n->Store->Index < 0) {
2287 GLboolean direct;
2288 GLint index = _slang_alloc_statevar(n, emitInfo->prog->Parameters, &direct);
2289 if (index < 0) {
2290 /* error */
2291 char s[100];
2292 /* XXX isn't this really an out of memory/resources error? */
2293 _mesa_snprintf(s, sizeof(s), "Undefined variable '%s'",
2294 (char *) n->Var->a_name);
2295 slang_info_log_error(emitInfo->log, s);
2296 return NULL;
2297 }
2298
2299 n->Store->Index = index;
2300 }
2301 else if (n->Store->File == PROGRAM_UNIFORM ||
2302 n->Store->File == PROGRAM_SAMPLER) {
2303 /* mark var as used */
2304 _mesa_use_uniform(emitInfo->prog->Parameters, (char *) n->Var->a_name);
2305 }
2306 else if (n->Store->File == PROGRAM_INPUT) {
2307 assert(n->Store->Index >= 0);
2308 emitInfo->prog->InputsRead |= (1 << n->Store->Index);
2309 }
2310
2311 if (n->Store->Index < 0) {
2312 /* probably ran out of registers */
2313 return NULL;
2314 }
2315 assert(n->Store->Size > 0);
2316
2317 return NULL;
2318 }
2319
2320
2321 static struct prog_instruction *
2322 emit(slang_emit_info *emitInfo, slang_ir_node *n)
2323 {
2324 struct prog_instruction *inst;
2325 if (!n)
2326 return NULL;
2327
2328 if (emitInfo->log->error_flag) {
2329 return NULL;
2330 }
2331
2332 if (n->Comment) {
2333 inst = new_instruction(emitInfo, OPCODE_NOP);
2334 if (inst) {
2335 inst->Comment = _mesa_strdup(n->Comment);
2336 }
2337 inst = NULL;
2338 }
2339
2340 switch (n->Opcode) {
2341 case IR_SEQ:
2342 /* sequence of two sub-trees */
2343 assert(n->Children[0]);
2344 assert(n->Children[1]);
2345 emit(emitInfo, n->Children[0]);
2346 if (emitInfo->log->error_flag)
2347 return NULL;
2348 inst = emit(emitInfo, n->Children[1]);
2349 #if 0
2350 assert(!n->Store);
2351 #endif
2352 n->Store = n->Children[1]->Store;
2353 return inst;
2354
2355 case IR_SCOPE:
2356 /* new variable scope */
2357 _slang_push_var_table(emitInfo->vt);
2358 inst = emit(emitInfo, n->Children[0]);
2359 _slang_pop_var_table(emitInfo->vt);
2360 return inst;
2361
2362 case IR_VAR_DECL:
2363 /* Variable declaration - allocate a register for it */
2364 inst = emit_var_decl(emitInfo, n);
2365 return inst;
2366
2367 case IR_VAR:
2368 /* Reference to a variable
2369 * Storage should have already been resolved/allocated.
2370 */
2371 return emit_var_ref(emitInfo, n);
2372
2373 case IR_ELEMENT:
2374 return emit_array_element(emitInfo, n);
2375 case IR_FIELD:
2376 return emit_struct_field(emitInfo, n);
2377 case IR_SWIZZLE:
2378 return emit_swizzle(emitInfo, n);
2379
2380 /* Simple arithmetic */
2381 /* unary */
2382 case IR_MOVE:
2383 case IR_RSQ:
2384 case IR_RCP:
2385 case IR_FLOOR:
2386 case IR_FRAC:
2387 case IR_F_TO_I:
2388 case IR_I_TO_F:
2389 case IR_ABS:
2390 case IR_SIN:
2391 case IR_COS:
2392 case IR_DDX:
2393 case IR_DDY:
2394 case IR_EXP:
2395 case IR_EXP2:
2396 case IR_LOG2:
2397 case IR_NOISE1:
2398 case IR_NOISE2:
2399 case IR_NOISE3:
2400 case IR_NOISE4:
2401 case IR_NRM4:
2402 case IR_NRM3:
2403 /* binary */
2404 case IR_ADD:
2405 case IR_SUB:
2406 case IR_MUL:
2407 case IR_DOT4:
2408 case IR_DOT3:
2409 case IR_DOT2:
2410 case IR_CROSS:
2411 case IR_MIN:
2412 case IR_MAX:
2413 case IR_SEQUAL:
2414 case IR_SNEQUAL:
2415 case IR_SGE:
2416 case IR_SGT:
2417 case IR_SLE:
2418 case IR_SLT:
2419 case IR_POW:
2420 /* trinary operators */
2421 case IR_LRP:
2422 case IR_CMP:
2423 return emit_arith(emitInfo, n);
2424
2425 case IR_EQUAL:
2426 case IR_NOTEQUAL:
2427 return emit_compare(emitInfo, n);
2428
2429 case IR_CLAMP:
2430 return emit_clamp(emitInfo, n);
2431 case IR_TEX:
2432 case IR_TEXB:
2433 case IR_TEXP:
2434 case IR_TEX_SH:
2435 case IR_TEXB_SH:
2436 case IR_TEXP_SH:
2437 return emit_tex(emitInfo, n);
2438 case IR_NEG:
2439 return emit_negation(emitInfo, n);
2440 case IR_FLOAT:
2441 /* find storage location for this float constant */
2442 n->Store->Index = _mesa_add_unnamed_constant(emitInfo->prog->Parameters,
2443 n->Value,
2444 n->Store->Size,
2445 &n->Store->Swizzle);
2446 if (n->Store->Index < 0) {
2447 slang_info_log_error(emitInfo->log, "Ran out of space for constants");
2448 return NULL;
2449 }
2450 return NULL;
2451
2452 case IR_COPY:
2453 return emit_copy(emitInfo, n);
2454
2455 case IR_COND:
2456 return emit_cond(emitInfo, n);
2457
2458 case IR_NOT:
2459 return emit_not(emitInfo, n);
2460
2461 case IR_LABEL:
2462 return emit_label(emitInfo, n);
2463
2464 case IR_KILL:
2465 return emit_kill(emitInfo);
2466
2467 case IR_CALL:
2468 /* new variable scope for subroutines/function calls */
2469 _slang_push_var_table(emitInfo->vt);
2470 inst = emit_fcall(emitInfo, n);
2471 _slang_pop_var_table(emitInfo->vt);
2472 return inst;
2473
2474 case IR_IF:
2475 return emit_if(emitInfo, n);
2476
2477 case IR_LOOP:
2478 return emit_loop(emitInfo, n);
2479 case IR_BREAK_IF_TRUE:
2480 case IR_CONT_IF_TRUE:
2481 return emit_cont_break_if_true(emitInfo, n);
2482 case IR_BREAK:
2483 /* fall-through */
2484 case IR_CONT:
2485 return emit_cont_break(emitInfo, n);
2486
2487 case IR_BEGIN_SUB:
2488 return new_instruction(emitInfo, OPCODE_BGNSUB);
2489 case IR_END_SUB:
2490 return new_instruction(emitInfo, OPCODE_ENDSUB);
2491 case IR_RETURN:
2492 return emit_return(emitInfo, n);
2493
2494 case IR_NOP:
2495 return NULL;
2496
2497 default:
2498 _mesa_problem(NULL, "Unexpected IR opcode in emit()\n");
2499 }
2500 return NULL;
2501 }
2502
2503
2504 /**
2505 * After code generation, any subroutines will be in separate program
2506 * objects. This function appends all the subroutines onto the main
2507 * program and resolves the linking of all the branch/call instructions.
2508 * XXX this logic should really be part of the linking process...
2509 */
2510 static void
2511 _slang_resolve_subroutines(slang_emit_info *emitInfo)
2512 {
2513 GET_CURRENT_CONTEXT(ctx);
2514 struct gl_program *mainP = emitInfo->prog;
2515 GLuint *subroutineLoc, i, total;
2516
2517 subroutineLoc
2518 = (GLuint *) _mesa_malloc(emitInfo->NumSubroutines * sizeof(GLuint));
2519
2520 /* total number of instructions */
2521 total = mainP->NumInstructions;
2522 for (i = 0; i < emitInfo->NumSubroutines; i++) {
2523 subroutineLoc[i] = total;
2524 total += emitInfo->Subroutines[i]->NumInstructions;
2525 }
2526
2527 /* adjust BranchTargets within the functions */
2528 for (i = 0; i < emitInfo->NumSubroutines; i++) {
2529 struct gl_program *sub = emitInfo->Subroutines[i];
2530 GLuint j;
2531 for (j = 0; j < sub->NumInstructions; j++) {
2532 struct prog_instruction *inst = sub->Instructions + j;
2533 if (inst->Opcode != OPCODE_CAL && inst->BranchTarget >= 0) {
2534 inst->BranchTarget += subroutineLoc[i];
2535 }
2536 }
2537 }
2538
2539 /* append subroutines' instructions after main's instructions */
2540 mainP->Instructions = _mesa_realloc_instructions(mainP->Instructions,
2541 mainP->NumInstructions,
2542 total);
2543 mainP->NumInstructions = total;
2544 for (i = 0; i < emitInfo->NumSubroutines; i++) {
2545 struct gl_program *sub = emitInfo->Subroutines[i];
2546 _mesa_copy_instructions(mainP->Instructions + subroutineLoc[i],
2547 sub->Instructions,
2548 sub->NumInstructions);
2549 /* delete subroutine code */
2550 sub->Parameters = NULL; /* prevent double-free */
2551 _mesa_reference_program(ctx, &emitInfo->Subroutines[i], NULL);
2552 }
2553
2554 /* free subroutine list */
2555 if (emitInfo->Subroutines) {
2556 _mesa_free(emitInfo->Subroutines);
2557 emitInfo->Subroutines = NULL;
2558 }
2559 emitInfo->NumSubroutines = 0;
2560
2561 /* Examine CAL instructions.
2562 * At this point, the BranchTarget field of the CAL instruction is
2563 * the number/id of the subroutine to call (an index into the
2564 * emitInfo->Subroutines list).
2565 * Translate that into an actual instruction location now.
2566 */
2567 for (i = 0; i < mainP->NumInstructions; i++) {
2568 struct prog_instruction *inst = mainP->Instructions + i;
2569 if (inst->Opcode == OPCODE_CAL) {
2570 const GLuint f = inst->BranchTarget;
2571 inst->BranchTarget = subroutineLoc[f];
2572 }
2573 }
2574
2575 _mesa_free(subroutineLoc);
2576 }
2577
2578
2579
2580 /**
2581 * Convert the IR tree into GPU instructions.
2582 * \param n root of IR tree
2583 * \param vt variable table
2584 * \param prog program to put GPU instructions into
2585 * \param pragmas controls codegen options
2586 * \param withEnd if true, emit END opcode at end
2587 * \param log log for emitting errors/warnings/info
2588 */
2589 GLboolean
2590 _slang_emit_code(slang_ir_node *n, slang_var_table *vt,
2591 struct gl_program *prog,
2592 const struct gl_sl_pragmas *pragmas,
2593 GLboolean withEnd,
2594 slang_info_log *log)
2595 {
2596 GET_CURRENT_CONTEXT(ctx);
2597 GLboolean success;
2598 slang_emit_info emitInfo;
2599 GLuint maxUniforms;
2600
2601 emitInfo.log = log;
2602 emitInfo.vt = vt;
2603 emitInfo.prog = prog;
2604 emitInfo.Subroutines = NULL;
2605 emitInfo.NumSubroutines = 0;
2606 emitInfo.MaxInstructions = prog->NumInstructions;
2607
2608 emitInfo.EmitHighLevelInstructions = ctx->Shader.EmitHighLevelInstructions;
2609 emitInfo.EmitCondCodes = ctx->Shader.EmitCondCodes;
2610 emitInfo.EmitComments = ctx->Shader.EmitComments || pragmas->Debug;
2611 emitInfo.EmitBeginEndSub = GL_TRUE;
2612
2613 if (!emitInfo.EmitCondCodes) {
2614 emitInfo.EmitHighLevelInstructions = GL_TRUE;
2615 }
2616
2617 /* Check uniform/constant limits */
2618 if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
2619 maxUniforms = ctx->Const.FragmentProgram.MaxUniformComponents / 4;
2620 }
2621 else {
2622 assert(prog->Target == GL_VERTEX_PROGRAM_ARB);
2623 maxUniforms = ctx->Const.VertexProgram.MaxUniformComponents / 4;
2624 }
2625 if (prog->Parameters->NumParameters > maxUniforms) {
2626 slang_info_log_error(log, "Constant/uniform register limit exceeded "
2627 "(max=%u vec4)", maxUniforms);
2628
2629 return GL_FALSE;
2630 }
2631
2632 (void) emit(&emitInfo, n);
2633
2634 /* finish up by adding the END opcode to program */
2635 if (withEnd) {
2636 struct prog_instruction *inst;
2637 inst = new_instruction(&emitInfo, OPCODE_END);
2638 if (!inst) {
2639 return GL_FALSE;
2640 }
2641 }
2642
2643 _slang_resolve_subroutines(&emitInfo);
2644
2645 success = GL_TRUE;
2646
2647 #if 0
2648 printf("*********** End emit code (%u inst):\n", prog->NumInstructions);
2649 _mesa_print_program(prog);
2650 _mesa_print_program_parameters(ctx,prog);
2651 #endif
2652
2653 return success;
2654 }