glsl: when debug pragma is on, emit comments about function calls/inlines
[mesa.git] / src / mesa / shader / slang / slang_emit.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2005-2008 Brian Paul All Rights Reserved.
5 * Copyright (C) 2008 VMware, Inc. All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file slang_emit.c
27 * Emit program instructions (PI code) from IR trees.
28 * \author Brian Paul
29 */
30
31 /***
32 *** NOTES
33 ***
34 *** To emit GPU instructions, we basically just do an in-order traversal
35 *** of the IR tree.
36 ***/
37
38
39 #include "main/imports.h"
40 #include "main/context.h"
41 #include "main/macros.h"
42 #include "shader/program.h"
43 #include "shader/prog_instruction.h"
44 #include "shader/prog_parameter.h"
45 #include "shader/prog_print.h"
46 #include "slang_builtin.h"
47 #include "slang_emit.h"
48 #include "slang_mem.h"
49
50
51 #define PEEPHOLE_OPTIMIZATIONS 1
52 #define ANNOTATE 0
53
54
55 typedef struct
56 {
57 slang_info_log *log;
58 slang_var_table *vt;
59 struct gl_program *prog;
60 struct gl_program **Subroutines;
61 GLuint NumSubroutines;
62
63 GLuint MaxInstructions; /**< size of prog->Instructions[] buffer */
64
65 /* code-gen options */
66 GLboolean EmitHighLevelInstructions;
67 GLboolean EmitCondCodes;
68 GLboolean EmitComments;
69 GLboolean EmitBeginEndSub; /* XXX TEMPORARY */
70 } slang_emit_info;
71
72
73
74 static struct gl_program *
75 new_subroutine(slang_emit_info *emitInfo, GLuint *id)
76 {
77 GET_CURRENT_CONTEXT(ctx);
78 const GLuint n = emitInfo->NumSubroutines;
79
80 emitInfo->Subroutines = (struct gl_program **)
81 _mesa_realloc(emitInfo->Subroutines,
82 n * sizeof(struct gl_program),
83 (n + 1) * sizeof(struct gl_program));
84 emitInfo->Subroutines[n] = ctx->Driver.NewProgram(ctx, emitInfo->prog->Target, 0);
85 emitInfo->Subroutines[n]->Parameters = emitInfo->prog->Parameters;
86 emitInfo->NumSubroutines++;
87 *id = n;
88 return emitInfo->Subroutines[n];
89 }
90
91
92 /**
93 * Convert a writemask to a swizzle. Used for testing cond codes because
94 * we only want to test the cond code component(s) that was set by the
95 * previous instruction.
96 */
97 static GLuint
98 writemask_to_swizzle(GLuint writemask)
99 {
100 if (writemask == WRITEMASK_X)
101 return SWIZZLE_XXXX;
102 if (writemask == WRITEMASK_Y)
103 return SWIZZLE_YYYY;
104 if (writemask == WRITEMASK_Z)
105 return SWIZZLE_ZZZZ;
106 if (writemask == WRITEMASK_W)
107 return SWIZZLE_WWWW;
108 return SWIZZLE_XYZW; /* shouldn't be hit */
109 }
110
111
112 /**
113 * Convert a swizzle mask to a writemask.
114 * Note that the slang_ir_storage->Swizzle field can represent either a
115 * swizzle mask or a writemask, depending on how it's used. For example,
116 * when we parse "direction.yz" alone, we don't know whether .yz is a
117 * writemask or a swizzle. In this case, we encode ".yz" in store->Swizzle
118 * as a swizzle mask (.yz?? actually). Later, if direction.yz is used as
119 * an R-value, we use store->Swizzle as-is. Otherwise, if direction.yz is
120 * used as an L-value, we convert it to a writemask.
121 */
122 static GLuint
123 swizzle_to_writemask(GLuint swizzle)
124 {
125 GLuint i, writemask = 0x0;
126 for (i = 0; i < 4; i++) {
127 GLuint swz = GET_SWZ(swizzle, i);
128 if (swz <= SWIZZLE_W) {
129 writemask |= (1 << swz);
130 }
131 }
132 return writemask;
133 }
134
135
136 /**
137 * Swizzle a swizzle (function composition).
138 * That is, return swz2(swz1), or said another way: swz1.szw2
139 * Example: swizzle_swizzle(".zwxx", ".xxyw") yields ".zzwx"
140 */
141 GLuint
142 _slang_swizzle_swizzle(GLuint swz1, GLuint swz2)
143 {
144 GLuint i, swz, s[4];
145 for (i = 0; i < 4; i++) {
146 GLuint c = GET_SWZ(swz2, i);
147 if (c <= SWIZZLE_W)
148 s[i] = GET_SWZ(swz1, c);
149 else
150 s[i] = c;
151 }
152 swz = MAKE_SWIZZLE4(s[0], s[1], s[2], s[3]);
153 return swz;
154 }
155
156
157 /**
158 * Return the default swizzle mask for accessing a variable of the
159 * given size (in floats). If size = 1, comp is used to identify
160 * which component [0..3] of the register holds the variable.
161 */
162 GLuint
163 _slang_var_swizzle(GLint size, GLint comp)
164 {
165 switch (size) {
166 case 1:
167 return MAKE_SWIZZLE4(comp, SWIZZLE_NIL, SWIZZLE_NIL, SWIZZLE_NIL);
168 case 2:
169 return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_NIL, SWIZZLE_NIL);
170 case 3:
171 return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_NIL);
172 default:
173 return SWIZZLE_XYZW;
174 }
175 }
176
177
178
179 /**
180 * Allocate storage for the given node (if it hasn't already been allocated).
181 *
182 * Typically this is temporary storage for an intermediate result (such as
183 * for a multiply or add, etc).
184 *
185 * If n->Store does not exist it will be created and will be of the size
186 * specified by defaultSize.
187 */
188 static GLboolean
189 alloc_node_storage(slang_emit_info *emitInfo, slang_ir_node *n,
190 GLint defaultSize)
191 {
192 assert(!n->Var);
193 if (!n->Store) {
194 assert(defaultSize > 0);
195 n->Store = _slang_new_ir_storage(PROGRAM_TEMPORARY, -1, defaultSize);
196 }
197
198 /* now allocate actual register(s). I.e. set n->Store->Index >= 0 */
199 if (n->Store->Index < 0) {
200 if (!_slang_alloc_temp(emitInfo->vt, n->Store)) {
201 slang_info_log_error(emitInfo->log,
202 "Ran out of registers, too many temporaries");
203 _slang_free(n->Store);
204 n->Store = NULL;
205 return GL_FALSE;
206 }
207 }
208 return GL_TRUE;
209 }
210
211
212 /**
213 * Free temporary storage, if n->Store is, in fact, temp storage.
214 * Otherwise, no-op.
215 */
216 static void
217 free_node_storage(slang_var_table *vt, slang_ir_node *n)
218 {
219 if (n->Store->File == PROGRAM_TEMPORARY &&
220 n->Store->Index >= 0 &&
221 n->Opcode != IR_SWIZZLE) {
222 if (_slang_is_temp(vt, n->Store)) {
223 _slang_free_temp(vt, n->Store);
224 n->Store->Index = -1;
225 n->Store = NULL; /* XXX this may not be needed */
226 }
227 }
228 }
229
230
231 /**
232 * Helper function to allocate a short-term temporary.
233 * Free it with _slang_free_temp().
234 */
235 static GLboolean
236 alloc_local_temp(slang_emit_info *emitInfo, slang_ir_storage *temp, GLint size)
237 {
238 assert(size >= 1);
239 assert(size <= 4);
240 _mesa_bzero(temp, sizeof(*temp));
241 temp->Size = size;
242 temp->File = PROGRAM_TEMPORARY;
243 temp->Index = -1;
244 return _slang_alloc_temp(emitInfo->vt, temp);
245 }
246
247
248 /**
249 * Remove any SWIZZLE_NIL terms from given swizzle mask.
250 * For a swizzle like .z??? generate .zzzz (replicate single component).
251 * Else, for .wx?? generate .wxzw (insert default component for the position).
252 */
253 static GLuint
254 fix_swizzle(GLuint swizzle)
255 {
256 GLuint c0 = GET_SWZ(swizzle, 0),
257 c1 = GET_SWZ(swizzle, 1),
258 c2 = GET_SWZ(swizzle, 2),
259 c3 = GET_SWZ(swizzle, 3);
260 if (c1 == SWIZZLE_NIL && c2 == SWIZZLE_NIL && c3 == SWIZZLE_NIL) {
261 /* smear first component across all positions */
262 c1 = c2 = c3 = c0;
263 }
264 else {
265 /* insert default swizzle components */
266 if (c0 == SWIZZLE_NIL)
267 c0 = SWIZZLE_X;
268 if (c1 == SWIZZLE_NIL)
269 c1 = SWIZZLE_Y;
270 if (c2 == SWIZZLE_NIL)
271 c2 = SWIZZLE_Z;
272 if (c3 == SWIZZLE_NIL)
273 c3 = SWIZZLE_W;
274 }
275 return MAKE_SWIZZLE4(c0, c1, c2, c3);
276 }
277
278
279
280 /**
281 * Convert IR storage to an instruction dst register.
282 */
283 static void
284 storage_to_dst_reg(struct prog_dst_register *dst, const slang_ir_storage *st)
285 {
286 const GLboolean relAddr = st->RelAddr;
287 const GLint size = st->Size;
288 GLint index = st->Index;
289 GLuint swizzle = st->Swizzle;
290
291 assert(index >= 0);
292 /* if this is storage relative to some parent storage, walk up the tree */
293 while (st->Parent) {
294 st = st->Parent;
295 assert(st->Index >= 0);
296 index += st->Index;
297 swizzle = _slang_swizzle_swizzle(st->Swizzle, swizzle);
298 }
299
300 assert(st->File != PROGRAM_UNDEFINED);
301 dst->File = st->File;
302
303 assert(index >= 0);
304 dst->Index = index;
305
306 assert(size >= 1);
307 assert(size <= 4);
308
309 if (swizzle != SWIZZLE_XYZW) {
310 dst->WriteMask = swizzle_to_writemask(swizzle);
311 }
312 else {
313 switch (size) {
314 case 1:
315 dst->WriteMask = WRITEMASK_X << GET_SWZ(st->Swizzle, 0);
316 break;
317 case 2:
318 dst->WriteMask = WRITEMASK_XY;
319 break;
320 case 3:
321 dst->WriteMask = WRITEMASK_XYZ;
322 break;
323 case 4:
324 dst->WriteMask = WRITEMASK_XYZW;
325 break;
326 default:
327 ; /* error would have been caught above */
328 }
329 }
330
331 dst->RelAddr = relAddr;
332 }
333
334
335 /**
336 * Convert IR storage to an instruction src register.
337 */
338 static void
339 storage_to_src_reg(struct prog_src_register *src, const slang_ir_storage *st)
340 {
341 const GLboolean relAddr = st->RelAddr;
342 GLint index = st->Index;
343 GLuint swizzle = st->Swizzle;
344
345 /* if this is storage relative to some parent storage, walk up the tree */
346 assert(index >= 0);
347 while (st->Parent) {
348 st = st->Parent;
349 if (st->Index < 0) {
350 /* an error should have been reported already */
351 return;
352 }
353 assert(st->Index >= 0);
354 index += st->Index;
355 swizzle = _slang_swizzle_swizzle(fix_swizzle(st->Swizzle), swizzle);
356 }
357
358 assert(st->File >= 0);
359 #if 1 /* XXX temporary */
360 if (st->File == PROGRAM_UNDEFINED) {
361 slang_ir_storage *st0 = (slang_ir_storage *) st;
362 st0->File = PROGRAM_TEMPORARY;
363 }
364 #endif
365 assert(st->File < PROGRAM_UNDEFINED);
366 src->File = st->File;
367
368 assert(index >= 0);
369 src->Index = index;
370
371 swizzle = fix_swizzle(swizzle);
372 assert(GET_SWZ(swizzle, 0) <= SWIZZLE_W);
373 assert(GET_SWZ(swizzle, 1) <= SWIZZLE_W);
374 assert(GET_SWZ(swizzle, 2) <= SWIZZLE_W);
375 assert(GET_SWZ(swizzle, 3) <= SWIZZLE_W);
376 src->Swizzle = swizzle;
377
378 src->RelAddr = relAddr;
379 }
380
381
382 /*
383 * Setup storage pointing to a scalar constant/literal.
384 */
385 static void
386 constant_to_storage(slang_emit_info *emitInfo,
387 GLfloat val,
388 slang_ir_storage *store)
389 {
390 GLuint swizzle;
391 GLint reg;
392 GLfloat value[4];
393
394 value[0] = val;
395 reg = _mesa_add_unnamed_constant(emitInfo->prog->Parameters,
396 value, 1, &swizzle);
397
398 memset(store, 0, sizeof(*store));
399 store->File = PROGRAM_CONSTANT;
400 store->Index = reg;
401 store->Swizzle = swizzle;
402 }
403
404
405 /**
406 * Add new instruction at end of given program.
407 * \param prog the program to append instruction onto
408 * \param opcode opcode for the new instruction
409 * \return pointer to the new instruction
410 */
411 static struct prog_instruction *
412 new_instruction(slang_emit_info *emitInfo, gl_inst_opcode opcode)
413 {
414 struct gl_program *prog = emitInfo->prog;
415 struct prog_instruction *inst;
416
417 #if 0
418 /* print prev inst */
419 if (prog->NumInstructions > 0) {
420 _mesa_print_instruction(prog->Instructions + prog->NumInstructions - 1);
421 }
422 #endif
423 assert(prog->NumInstructions <= emitInfo->MaxInstructions);
424
425 if (prog->NumInstructions == emitInfo->MaxInstructions) {
426 /* grow the instruction buffer */
427 emitInfo->MaxInstructions += 20;
428 prog->Instructions =
429 _mesa_realloc_instructions(prog->Instructions,
430 prog->NumInstructions,
431 emitInfo->MaxInstructions);
432 }
433
434 inst = prog->Instructions + prog->NumInstructions;
435 prog->NumInstructions++;
436 _mesa_init_instructions(inst, 1);
437 inst->Opcode = opcode;
438 inst->BranchTarget = -1; /* invalid */
439 /*
440 printf("New inst %d: %p %s\n", prog->NumInstructions-1,(void*)inst,
441 _mesa_opcode_string(inst->Opcode));
442 */
443 return inst;
444 }
445
446
447 static struct prog_instruction *
448 emit_arl_load(slang_emit_info *emitInfo,
449 gl_register_file file, GLint index, GLuint swizzle)
450 {
451 struct prog_instruction *inst = new_instruction(emitInfo, OPCODE_ARL);
452 inst->SrcReg[0].File = file;
453 inst->SrcReg[0].Index = index;
454 inst->SrcReg[0].Swizzle = fix_swizzle(swizzle);
455 inst->DstReg.File = PROGRAM_ADDRESS;
456 inst->DstReg.Index = 0;
457 inst->DstReg.WriteMask = WRITEMASK_X;
458 return inst;
459 }
460
461
462 /**
463 * Emit a new instruction with given opcode, operands.
464 * At this point the instruction may have multiple indirect register
465 * loads/stores. We convert those into ARL loads and address-relative
466 * operands. See comments inside.
467 * At some point in the future we could directly emit indirectly addressed
468 * registers in Mesa GPU instructions.
469 */
470 static struct prog_instruction *
471 emit_instruction(slang_emit_info *emitInfo,
472 gl_inst_opcode opcode,
473 const slang_ir_storage *dst,
474 const slang_ir_storage *src0,
475 const slang_ir_storage *src1,
476 const slang_ir_storage *src2)
477 {
478 struct prog_instruction *inst;
479 GLuint numIndirect = 0;
480 const slang_ir_storage *src[3];
481 slang_ir_storage newSrc[3], newDst;
482 GLuint i;
483 GLboolean isTemp[3];
484
485 isTemp[0] = isTemp[1] = isTemp[2] = GL_FALSE;
486
487 src[0] = src0;
488 src[1] = src1;
489 src[2] = src2;
490
491 /* count up how many operands are indirect loads */
492 for (i = 0; i < 3; i++) {
493 if (src[i] && src[i]->IsIndirect)
494 numIndirect++;
495 }
496 if (dst && dst->IsIndirect)
497 numIndirect++;
498
499 /* Take special steps for indirect register loads.
500 * If we had multiple address registers this would be simpler.
501 * For example, this GLSL code:
502 * x[i] = y[j] + z[k];
503 * would translate into something like:
504 * ARL ADDR.x, i;
505 * ARL ADDR.y, j;
506 * ARL ADDR.z, k;
507 * ADD TEMP[ADDR.x+5], TEMP[ADDR.y+9], TEMP[ADDR.z+4];
508 * But since we currently only have one address register we have to do this:
509 * ARL ADDR.x, i;
510 * MOV t1, TEMP[ADDR.x+9];
511 * ARL ADDR.x, j;
512 * MOV t2, TEMP[ADDR.x+4];
513 * ARL ADDR.x, k;
514 * ADD TEMP[ADDR.x+5], t1, t2;
515 * The code here figures this out...
516 */
517 if (numIndirect > 0) {
518 for (i = 0; i < 3; i++) {
519 if (src[i] && src[i]->IsIndirect) {
520 /* load the ARL register with the indirect register */
521 emit_arl_load(emitInfo,
522 src[i]->IndirectFile,
523 src[i]->IndirectIndex,
524 src[i]->IndirectSwizzle);
525
526 if (numIndirect > 1) {
527 /* Need to load src[i] into a temporary register */
528 slang_ir_storage srcRelAddr;
529 alloc_local_temp(emitInfo, &newSrc[i], src[i]->Size);
530 isTemp[i] = GL_TRUE;
531
532 /* set RelAddr flag on src register */
533 srcRelAddr = *src[i];
534 srcRelAddr.RelAddr = GL_TRUE;
535 srcRelAddr.IsIndirect = GL_FALSE; /* not really needed */
536
537 /* MOV newSrc, srcRelAddr; */
538 inst = emit_instruction(emitInfo,
539 OPCODE_MOV,
540 &newSrc[i],
541 &srcRelAddr,
542 NULL,
543 NULL);
544
545 src[i] = &newSrc[i];
546 }
547 else {
548 /* just rewrite the src[i] storage to be ARL-relative */
549 newSrc[i] = *src[i];
550 newSrc[i].RelAddr = GL_TRUE;
551 newSrc[i].IsIndirect = GL_FALSE; /* not really needed */
552 src[i] = &newSrc[i];
553 }
554 }
555 }
556 }
557
558 /* Take special steps for indirect dest register write */
559 if (dst && dst->IsIndirect) {
560 /* load the ARL register with the indirect register */
561 emit_arl_load(emitInfo,
562 dst->IndirectFile,
563 dst->IndirectIndex,
564 dst->IndirectSwizzle);
565 newDst = *dst;
566 newDst.RelAddr = GL_TRUE;
567 newDst.IsIndirect = GL_FALSE;
568 dst = &newDst;
569 }
570
571 /* OK, emit the instruction and its dst, src regs */
572 inst = new_instruction(emitInfo, opcode);
573 if (!inst)
574 return NULL;
575
576 if (dst)
577 storage_to_dst_reg(&inst->DstReg, dst);
578
579 for (i = 0; i < 3; i++) {
580 if (src[i])
581 storage_to_src_reg(&inst->SrcReg[i], src[i]);
582 }
583
584 /* Free any temp registers that we allocated above */
585 for (i = 0; i < 3; i++) {
586 if (isTemp[i])
587 _slang_free_temp(emitInfo->vt, &newSrc[i]);
588 }
589
590 return inst;
591 }
592
593
594
595 /**
596 * Put a comment on the given instruction.
597 */
598 static void
599 inst_comment(struct prog_instruction *inst, const char *comment)
600 {
601 if (inst)
602 inst->Comment = _mesa_strdup(comment);
603 }
604
605
606
607 /**
608 * Return pointer to last instruction in program.
609 */
610 static struct prog_instruction *
611 prev_instruction(slang_emit_info *emitInfo)
612 {
613 struct gl_program *prog = emitInfo->prog;
614 if (prog->NumInstructions == 0)
615 return NULL;
616 else
617 return prog->Instructions + prog->NumInstructions - 1;
618 }
619
620
621 static struct prog_instruction *
622 emit(slang_emit_info *emitInfo, slang_ir_node *n);
623
624
625 /**
626 * Return an annotation string for given node's storage.
627 */
628 static char *
629 storage_annotation(const slang_ir_node *n, const struct gl_program *prog)
630 {
631 #if ANNOTATE
632 const slang_ir_storage *st = n->Store;
633 static char s[100] = "";
634
635 if (!st)
636 return _mesa_strdup("");
637
638 switch (st->File) {
639 case PROGRAM_CONSTANT:
640 if (st->Index >= 0) {
641 const GLfloat *val = prog->Parameters->ParameterValues[st->Index];
642 if (st->Swizzle == SWIZZLE_NOOP)
643 sprintf(s, "{%g, %g, %g, %g}", val[0], val[1], val[2], val[3]);
644 else {
645 sprintf(s, "%g", val[GET_SWZ(st->Swizzle, 0)]);
646 }
647 }
648 break;
649 case PROGRAM_TEMPORARY:
650 if (n->Var)
651 sprintf(s, "%s", (char *) n->Var->a_name);
652 else
653 sprintf(s, "t[%d]", st->Index);
654 break;
655 case PROGRAM_STATE_VAR:
656 case PROGRAM_UNIFORM:
657 sprintf(s, "%s", prog->Parameters->Parameters[st->Index].Name);
658 break;
659 case PROGRAM_VARYING:
660 sprintf(s, "%s", prog->Varying->Parameters[st->Index].Name);
661 break;
662 case PROGRAM_INPUT:
663 sprintf(s, "input[%d]", st->Index);
664 break;
665 case PROGRAM_OUTPUT:
666 sprintf(s, "output[%d]", st->Index);
667 break;
668 default:
669 s[0] = 0;
670 }
671 return _mesa_strdup(s);
672 #else
673 return NULL;
674 #endif
675 }
676
677
678 /**
679 * Return an annotation string for an instruction.
680 */
681 static char *
682 instruction_annotation(gl_inst_opcode opcode, char *dstAnnot,
683 char *srcAnnot0, char *srcAnnot1, char *srcAnnot2)
684 {
685 #if ANNOTATE
686 const char *operator;
687 char *s;
688 int len = 50;
689
690 if (dstAnnot)
691 len += strlen(dstAnnot);
692 else
693 dstAnnot = _mesa_strdup("");
694
695 if (srcAnnot0)
696 len += strlen(srcAnnot0);
697 else
698 srcAnnot0 = _mesa_strdup("");
699
700 if (srcAnnot1)
701 len += strlen(srcAnnot1);
702 else
703 srcAnnot1 = _mesa_strdup("");
704
705 if (srcAnnot2)
706 len += strlen(srcAnnot2);
707 else
708 srcAnnot2 = _mesa_strdup("");
709
710 switch (opcode) {
711 case OPCODE_ADD:
712 operator = "+";
713 break;
714 case OPCODE_SUB:
715 operator = "-";
716 break;
717 case OPCODE_MUL:
718 operator = "*";
719 break;
720 case OPCODE_DP2:
721 operator = "DP2";
722 break;
723 case OPCODE_DP3:
724 operator = "DP3";
725 break;
726 case OPCODE_DP4:
727 operator = "DP4";
728 break;
729 case OPCODE_XPD:
730 operator = "XPD";
731 break;
732 case OPCODE_RSQ:
733 operator = "RSQ";
734 break;
735 case OPCODE_SGT:
736 operator = ">";
737 break;
738 default:
739 operator = ",";
740 }
741
742 s = (char *) malloc(len);
743 sprintf(s, "%s = %s %s %s %s", dstAnnot,
744 srcAnnot0, operator, srcAnnot1, srcAnnot2);
745 assert(_mesa_strlen(s) < len);
746
747 free(dstAnnot);
748 free(srcAnnot0);
749 free(srcAnnot1);
750 free(srcAnnot2);
751
752 return s;
753 #else
754 return NULL;
755 #endif
756 }
757
758
759 /**
760 * Emit an instruction that's just a comment.
761 */
762 static struct prog_instruction *
763 emit_comment(slang_emit_info *emitInfo, const char *comment)
764 {
765 struct prog_instruction *inst = new_instruction(emitInfo, OPCODE_NOP);
766 inst_comment(inst, comment);
767 return inst;
768 }
769
770
771 /**
772 * Generate code for a simple arithmetic instruction.
773 * Either 1, 2 or 3 operands.
774 */
775 static struct prog_instruction *
776 emit_arith(slang_emit_info *emitInfo, slang_ir_node *n)
777 {
778 const slang_ir_info *info = _slang_ir_info(n->Opcode);
779 struct prog_instruction *inst;
780 GLuint i;
781
782 assert(info);
783 assert(info->InstOpcode != OPCODE_NOP);
784
785 #if PEEPHOLE_OPTIMIZATIONS
786 /* Look for MAD opportunity */
787 if (info->NumParams == 2 &&
788 n->Opcode == IR_ADD && n->Children[0]->Opcode == IR_MUL) {
789 /* found pattern IR_ADD(IR_MUL(A, B), C) */
790 emit(emitInfo, n->Children[0]->Children[0]); /* A */
791 emit(emitInfo, n->Children[0]->Children[1]); /* B */
792 emit(emitInfo, n->Children[1]); /* C */
793 alloc_node_storage(emitInfo, n, -1); /* dest */
794
795 inst = emit_instruction(emitInfo,
796 OPCODE_MAD,
797 n->Store,
798 n->Children[0]->Children[0]->Store,
799 n->Children[0]->Children[1]->Store,
800 n->Children[1]->Store);
801
802 free_node_storage(emitInfo->vt, n->Children[0]->Children[0]);
803 free_node_storage(emitInfo->vt, n->Children[0]->Children[1]);
804 free_node_storage(emitInfo->vt, n->Children[1]);
805 return inst;
806 }
807
808 if (info->NumParams == 2 &&
809 n->Opcode == IR_ADD && n->Children[1]->Opcode == IR_MUL) {
810 /* found pattern IR_ADD(A, IR_MUL(B, C)) */
811 emit(emitInfo, n->Children[0]); /* A */
812 emit(emitInfo, n->Children[1]->Children[0]); /* B */
813 emit(emitInfo, n->Children[1]->Children[1]); /* C */
814 alloc_node_storage(emitInfo, n, -1); /* dest */
815
816 inst = emit_instruction(emitInfo,
817 OPCODE_MAD,
818 n->Store,
819 n->Children[1]->Children[0]->Store,
820 n->Children[1]->Children[1]->Store,
821 n->Children[0]->Store);
822
823 free_node_storage(emitInfo->vt, n->Children[1]->Children[0]);
824 free_node_storage(emitInfo->vt, n->Children[1]->Children[1]);
825 free_node_storage(emitInfo->vt, n->Children[0]);
826 return inst;
827 }
828 #endif
829
830 /* gen code for children, may involve temp allocation */
831 for (i = 0; i < info->NumParams; i++) {
832 emit(emitInfo, n->Children[i]);
833 if (!n->Children[i] || !n->Children[i]->Store) {
834 /* error recovery */
835 return NULL;
836 }
837 }
838
839 /* result storage */
840 alloc_node_storage(emitInfo, n, -1);
841
842 inst = emit_instruction(emitInfo,
843 info->InstOpcode,
844 n->Store, /* dest */
845 (info->NumParams > 0 ? n->Children[0]->Store : NULL),
846 (info->NumParams > 1 ? n->Children[1]->Store : NULL),
847 (info->NumParams > 2 ? n->Children[2]->Store : NULL)
848 );
849
850 /* free temps */
851 for (i = 0; i < info->NumParams; i++)
852 free_node_storage(emitInfo->vt, n->Children[i]);
853
854 return inst;
855 }
856
857
858 /**
859 * Emit code for == and != operators. These could normally be handled
860 * by emit_arith() except we need to be able to handle structure comparisons.
861 */
862 static struct prog_instruction *
863 emit_compare(slang_emit_info *emitInfo, slang_ir_node *n)
864 {
865 struct prog_instruction *inst = NULL;
866 GLint size;
867
868 assert(n->Opcode == IR_EQUAL || n->Opcode == IR_NOTEQUAL);
869
870 /* gen code for children */
871 emit(emitInfo, n->Children[0]);
872 emit(emitInfo, n->Children[1]);
873
874 if (n->Children[0]->Store->Size != n->Children[1]->Store->Size) {
875 slang_info_log_error(emitInfo->log, "invalid operands to == or !=");
876 n->Store = NULL;
877 return NULL;
878 }
879
880 /* final result is 1 bool */
881 if (!alloc_node_storage(emitInfo, n, 1))
882 return NULL;
883
884 size = n->Children[0]->Store->Size;
885
886 if (size == 1) {
887 gl_inst_opcode opcode = n->Opcode == IR_EQUAL ? OPCODE_SEQ : OPCODE_SNE;
888 inst = emit_instruction(emitInfo,
889 opcode,
890 n->Store, /* dest */
891 n->Children[0]->Store,
892 n->Children[1]->Store,
893 NULL);
894 }
895 else if (size <= 4) {
896 /* compare two vectors.
897 * Unfortunately, there's no instruction to compare vectors and
898 * return a scalar result. Do it with some compare and dot product
899 * instructions...
900 */
901 GLuint swizzle;
902 gl_inst_opcode dotOp;
903 slang_ir_storage tempStore;
904
905 if (!alloc_local_temp(emitInfo, &tempStore, 4)) {
906 n->Store = NULL;
907 return NULL;
908 /* out of temps */
909 }
910
911 if (size == 4) {
912 dotOp = OPCODE_DP4;
913 swizzle = SWIZZLE_XYZW;
914 }
915 else if (size == 3) {
916 dotOp = OPCODE_DP3;
917 swizzle = SWIZZLE_XYZW;
918 }
919 else {
920 assert(size == 2);
921 dotOp = OPCODE_DP3; /* XXX use OPCODE_DP2 eventually */
922 swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y);
923 }
924
925 /* Compute inequality (temp = (A != B)) */
926 inst = emit_instruction(emitInfo,
927 OPCODE_SNE,
928 &tempStore,
929 n->Children[0]->Store,
930 n->Children[1]->Store,
931 NULL);
932 inst_comment(inst, "Compare values");
933
934 /* Compute val = DOT(temp, temp) (reduction) */
935 inst = emit_instruction(emitInfo,
936 dotOp,
937 n->Store,
938 &tempStore,
939 &tempStore,
940 NULL);
941 inst->SrcReg[0].Swizzle = inst->SrcReg[1].Swizzle = swizzle; /*override*/
942 inst_comment(inst, "Reduce vec to bool");
943
944 _slang_free_temp(emitInfo->vt, &tempStore); /* free temp */
945
946 if (n->Opcode == IR_EQUAL) {
947 /* compute val = !val.x with SEQ val, val, 0; */
948 slang_ir_storage zero;
949 constant_to_storage(emitInfo, 0.0, &zero);
950 inst = emit_instruction(emitInfo,
951 OPCODE_SEQ,
952 n->Store, /* dest */
953 n->Store,
954 &zero,
955 NULL);
956 inst_comment(inst, "Invert true/false");
957 }
958 }
959 else {
960 /* size > 4, struct or array compare.
961 * XXX this won't work reliably for structs with padding!!
962 */
963 GLint i, num = (n->Children[0]->Store->Size + 3) / 4;
964 slang_ir_storage accTemp, sneTemp;
965
966 if (!alloc_local_temp(emitInfo, &accTemp, 4))
967 return NULL;
968
969 if (!alloc_local_temp(emitInfo, &sneTemp, 4))
970 return NULL;
971
972 for (i = 0; i < num; i++) {
973 slang_ir_storage srcStore0 = *n->Children[0]->Store;
974 slang_ir_storage srcStore1 = *n->Children[1]->Store;
975 srcStore0.Index += i;
976 srcStore1.Index += i;
977
978 if (i == 0) {
979 /* SNE accTemp, left[i], right[i] */
980 inst = emit_instruction(emitInfo, OPCODE_SNE,
981 &accTemp, /* dest */
982 &srcStore0,
983 &srcStore1,
984 NULL);
985 inst_comment(inst, "Begin struct/array comparison");
986 }
987 else {
988 /* SNE sneTemp, left[i], right[i] */
989 inst = emit_instruction(emitInfo, OPCODE_SNE,
990 &sneTemp, /* dest */
991 &srcStore0,
992 &srcStore1,
993 NULL);
994 /* ADD accTemp, accTemp, sneTemp; # like logical-OR */
995 inst = emit_instruction(emitInfo, OPCODE_ADD,
996 &accTemp, /* dest */
997 &accTemp,
998 &sneTemp,
999 NULL);
1000 }
1001 }
1002
1003 /* compute accTemp.x || accTemp.y || accTemp.z || accTemp.w with DOT4 */
1004 inst = emit_instruction(emitInfo, OPCODE_DP4,
1005 n->Store,
1006 &accTemp,
1007 &accTemp,
1008 NULL);
1009 inst_comment(inst, "End struct/array comparison");
1010
1011 if (n->Opcode == IR_EQUAL) {
1012 /* compute tmp.x = !tmp.x via tmp.x = (tmp.x == 0) */
1013 slang_ir_storage zero;
1014 constant_to_storage(emitInfo, 0.0, &zero);
1015 inst = emit_instruction(emitInfo, OPCODE_SEQ,
1016 n->Store, /* dest */
1017 n->Store,
1018 &zero,
1019 NULL);
1020 inst_comment(inst, "Invert true/false");
1021 }
1022
1023 _slang_free_temp(emitInfo->vt, &accTemp);
1024 _slang_free_temp(emitInfo->vt, &sneTemp);
1025 }
1026
1027 /* free temps */
1028 free_node_storage(emitInfo->vt, n->Children[0]);
1029 free_node_storage(emitInfo->vt, n->Children[1]);
1030
1031 return inst;
1032 }
1033
1034
1035
1036 /**
1037 * Generate code for an IR_CLAMP instruction.
1038 */
1039 static struct prog_instruction *
1040 emit_clamp(slang_emit_info *emitInfo, slang_ir_node *n)
1041 {
1042 struct prog_instruction *inst;
1043 slang_ir_node tmpNode;
1044
1045 assert(n->Opcode == IR_CLAMP);
1046 /* ch[0] = value
1047 * ch[1] = min limit
1048 * ch[2] = max limit
1049 */
1050
1051 inst = emit(emitInfo, n->Children[0]);
1052
1053 /* If lower limit == 0.0 and upper limit == 1.0,
1054 * set prev instruction's SaturateMode field to SATURATE_ZERO_ONE.
1055 * Else,
1056 * emit OPCODE_MIN, OPCODE_MAX sequence.
1057 */
1058 #if 0
1059 /* XXX this isn't quite finished yet */
1060 if (n->Children[1]->Opcode == IR_FLOAT &&
1061 n->Children[1]->Value[0] == 0.0 &&
1062 n->Children[1]->Value[1] == 0.0 &&
1063 n->Children[1]->Value[2] == 0.0 &&
1064 n->Children[1]->Value[3] == 0.0 &&
1065 n->Children[2]->Opcode == IR_FLOAT &&
1066 n->Children[2]->Value[0] == 1.0 &&
1067 n->Children[2]->Value[1] == 1.0 &&
1068 n->Children[2]->Value[2] == 1.0 &&
1069 n->Children[2]->Value[3] == 1.0) {
1070 if (!inst) {
1071 inst = prev_instruction(prog);
1072 }
1073 if (inst && inst->Opcode != OPCODE_NOP) {
1074 /* and prev instruction's DstReg matches n->Children[0]->Store */
1075 inst->SaturateMode = SATURATE_ZERO_ONE;
1076 n->Store = n->Children[0]->Store;
1077 return inst;
1078 }
1079 }
1080 #endif
1081
1082 if (!alloc_node_storage(emitInfo, n, n->Children[0]->Store->Size))
1083 return NULL;
1084
1085 emit(emitInfo, n->Children[1]);
1086 emit(emitInfo, n->Children[2]);
1087
1088 /* Some GPUs don't allow reading from output registers. So if the
1089 * dest for this clamp() is an output reg, we can't use that reg for
1090 * the intermediate result. Use a temp register instead.
1091 */
1092 _mesa_bzero(&tmpNode, sizeof(tmpNode));
1093 alloc_node_storage(emitInfo, &tmpNode, n->Store->Size);
1094
1095 /* tmp = max(ch[0], ch[1]) */
1096 inst = emit_instruction(emitInfo, OPCODE_MAX,
1097 tmpNode.Store, /* dest */
1098 n->Children[0]->Store,
1099 n->Children[1]->Store,
1100 NULL);
1101
1102 /* n->dest = min(tmp, ch[2]) */
1103 inst = emit_instruction(emitInfo, OPCODE_MIN,
1104 n->Store, /* dest */
1105 tmpNode.Store,
1106 n->Children[2]->Store,
1107 NULL);
1108
1109 free_node_storage(emitInfo->vt, &tmpNode);
1110
1111 return inst;
1112 }
1113
1114
1115 static struct prog_instruction *
1116 emit_negation(slang_emit_info *emitInfo, slang_ir_node *n)
1117 {
1118 /* Implement as MOV dst, -src; */
1119 /* XXX we could look at the previous instruction and in some circumstances
1120 * modify it to accomplish the negation.
1121 */
1122 struct prog_instruction *inst;
1123
1124 emit(emitInfo, n->Children[0]);
1125
1126 if (!alloc_node_storage(emitInfo, n, n->Children[0]->Store->Size))
1127 return NULL;
1128
1129 inst = emit_instruction(emitInfo,
1130 OPCODE_MOV,
1131 n->Store, /* dest */
1132 n->Children[0]->Store,
1133 NULL,
1134 NULL);
1135 inst->SrcReg[0].NegateBase = NEGATE_XYZW;
1136 return inst;
1137 }
1138
1139
1140 static struct prog_instruction *
1141 emit_label(slang_emit_info *emitInfo, const slang_ir_node *n)
1142 {
1143 assert(n->Label);
1144 #if 0
1145 /* XXX this fails in loop tail code - investigate someday */
1146 assert(_slang_label_get_location(n->Label) < 0);
1147 _slang_label_set_location(n->Label, emitInfo->prog->NumInstructions,
1148 emitInfo->prog);
1149 #else
1150 if (_slang_label_get_location(n->Label) < 0)
1151 _slang_label_set_location(n->Label, emitInfo->prog->NumInstructions,
1152 emitInfo->prog);
1153 #endif
1154 return NULL;
1155 }
1156
1157
1158 /**
1159 * Emit code for a function call.
1160 * Note that for each time a function is called, we emit the function's
1161 * body code again because the set of available registers may be different.
1162 */
1163 static struct prog_instruction *
1164 emit_fcall(slang_emit_info *emitInfo, slang_ir_node *n)
1165 {
1166 struct gl_program *progSave;
1167 struct prog_instruction *inst;
1168 GLuint subroutineId;
1169 GLuint maxInstSave;
1170
1171 assert(n->Opcode == IR_CALL);
1172 assert(n->Label);
1173
1174 /* save/push cur program */
1175 maxInstSave = emitInfo->MaxInstructions;
1176 progSave = emitInfo->prog;
1177
1178 emitInfo->prog = new_subroutine(emitInfo, &subroutineId);
1179 emitInfo->MaxInstructions = emitInfo->prog->NumInstructions;
1180
1181 _slang_label_set_location(n->Label, emitInfo->prog->NumInstructions,
1182 emitInfo->prog);
1183
1184 if (emitInfo->EmitBeginEndSub) {
1185 /* BGNSUB isn't a real instruction.
1186 * We require a label (i.e. "foobar:") though, if we're going to
1187 * print the program in the NV format. The BNGSUB instruction is
1188 * really just a NOP to attach the label to.
1189 */
1190 inst = new_instruction(emitInfo, OPCODE_BGNSUB);
1191 inst_comment(inst, n->Label->Name);
1192 }
1193
1194 /* body of function: */
1195 emit(emitInfo, n->Children[0]);
1196 n->Store = n->Children[0]->Store;
1197
1198 /* add RET instruction now, if needed */
1199 inst = prev_instruction(emitInfo);
1200 if (inst && inst->Opcode != OPCODE_RET) {
1201 inst = new_instruction(emitInfo, OPCODE_RET);
1202 }
1203
1204 if (emitInfo->EmitBeginEndSub) {
1205 inst = new_instruction(emitInfo, OPCODE_ENDSUB);
1206 inst_comment(inst, n->Label->Name);
1207 }
1208
1209 /* pop/restore cur program */
1210 emitInfo->prog = progSave;
1211 emitInfo->MaxInstructions = maxInstSave;
1212
1213 /* emit the function call */
1214 inst = new_instruction(emitInfo, OPCODE_CAL);
1215 /* The branch target is just the subroutine number (changed later) */
1216 inst->BranchTarget = subroutineId;
1217 inst_comment(inst, n->Label->Name);
1218 assert(inst->BranchTarget >= 0);
1219
1220 return inst;
1221 }
1222
1223
1224 /**
1225 * Emit code for a 'return' statement.
1226 */
1227 static struct prog_instruction *
1228 emit_return(slang_emit_info *emitInfo, slang_ir_node *n)
1229 {
1230 struct prog_instruction *inst;
1231 assert(n);
1232 assert(n->Opcode == IR_RETURN);
1233 assert(n->Label);
1234 inst = new_instruction(emitInfo, OPCODE_RET);
1235 inst->DstReg.CondMask = COND_TR; /* always return */
1236 return inst;
1237 }
1238
1239
1240 static struct prog_instruction *
1241 emit_kill(slang_emit_info *emitInfo)
1242 {
1243 struct gl_fragment_program *fp;
1244 struct prog_instruction *inst;
1245 /* NV-KILL - discard fragment depending on condition code.
1246 * Note that ARB-KILL depends on sign of vector operand.
1247 */
1248 inst = new_instruction(emitInfo, OPCODE_KIL_NV);
1249 inst->DstReg.CondMask = COND_TR; /* always kill */
1250
1251 assert(emitInfo->prog->Target == GL_FRAGMENT_PROGRAM_ARB);
1252 fp = (struct gl_fragment_program *) emitInfo->prog;
1253 fp->UsesKill = GL_TRUE;
1254
1255 return inst;
1256 }
1257
1258
1259 static struct prog_instruction *
1260 emit_tex(slang_emit_info *emitInfo, slang_ir_node *n)
1261 {
1262 struct prog_instruction *inst;
1263 gl_inst_opcode opcode;
1264 GLboolean shadow = GL_FALSE;
1265
1266 switch (n->Opcode) {
1267 case IR_TEX:
1268 opcode = OPCODE_TEX;
1269 break;
1270 case IR_TEX_SH:
1271 opcode = OPCODE_TEX;
1272 shadow = GL_TRUE;
1273 break;
1274 case IR_TEXB:
1275 opcode = OPCODE_TXB;
1276 break;
1277 case IR_TEXB_SH:
1278 opcode = OPCODE_TXB;
1279 shadow = GL_TRUE;
1280 break;
1281 case IR_TEXP:
1282 opcode = OPCODE_TXP;
1283 break;
1284 case IR_TEXP_SH:
1285 opcode = OPCODE_TXP;
1286 shadow = GL_TRUE;
1287 break;
1288 default:
1289 _mesa_problem(NULL, "Bad IR TEX code");
1290 return NULL;
1291 }
1292
1293 if (n->Children[0]->Opcode == IR_ELEMENT) {
1294 /* array is the sampler (a uniform which'll indicate the texture unit) */
1295 assert(n->Children[0]->Children[0]->Store);
1296 assert(n->Children[0]->Children[0]->Store->File == PROGRAM_SAMPLER);
1297
1298 emit(emitInfo, n->Children[0]);
1299
1300 n->Children[0]->Var = n->Children[0]->Children[0]->Var;
1301 } else {
1302 /* this is the sampler (a uniform which'll indicate the texture unit) */
1303 assert(n->Children[0]->Store);
1304 assert(n->Children[0]->Store->File == PROGRAM_SAMPLER);
1305 }
1306
1307 /* emit code for the texcoord operand */
1308 (void) emit(emitInfo, n->Children[1]);
1309
1310 /* alloc storage for result of texture fetch */
1311 if (!alloc_node_storage(emitInfo, n, 4))
1312 return NULL;
1313
1314 /* emit TEX instruction; Child[1] is the texcoord */
1315 inst = emit_instruction(emitInfo,
1316 opcode,
1317 n->Store,
1318 n->Children[1]->Store,
1319 NULL,
1320 NULL);
1321
1322 inst->TexShadow = shadow;
1323
1324 /* Store->Index is the uniform/sampler index */
1325 assert(n->Children[0]->Store->Index >= 0);
1326 inst->TexSrcUnit = n->Children[0]->Store->Index;
1327 inst->TexSrcTarget = n->Children[0]->Store->TexTarget;
1328
1329 /* mark the sampler as being used */
1330 _mesa_use_uniform(emitInfo->prog->Parameters,
1331 (char *) n->Children[0]->Var->a_name);
1332
1333 return inst;
1334 }
1335
1336
1337 /**
1338 * Assignment/copy
1339 */
1340 static struct prog_instruction *
1341 emit_copy(slang_emit_info *emitInfo, slang_ir_node *n)
1342 {
1343 struct prog_instruction *inst;
1344
1345 assert(n->Opcode == IR_COPY);
1346
1347 /* lhs */
1348 emit(emitInfo, n->Children[0]);
1349 if (!n->Children[0]->Store || n->Children[0]->Store->Index < 0) {
1350 /* an error should have been already recorded */
1351 return NULL;
1352 }
1353
1354 /* rhs */
1355 assert(n->Children[1]);
1356 inst = emit(emitInfo, n->Children[1]);
1357
1358 if (!n->Children[1]->Store || n->Children[1]->Store->Index < 0) {
1359 if (!emitInfo->log->text) {
1360 slang_info_log_error(emitInfo->log, "invalid assignment");
1361 }
1362 return NULL;
1363 }
1364
1365 assert(n->Children[1]->Store->Index >= 0);
1366
1367 /*assert(n->Children[0]->Store->Size == n->Children[1]->Store->Size);*/
1368
1369 n->Store = n->Children[0]->Store;
1370
1371 if (n->Store->File == PROGRAM_SAMPLER) {
1372 /* no code generated for sampler assignments,
1373 * just copy the sampler index/target at compile time.
1374 */
1375 n->Store->Index = n->Children[1]->Store->Index;
1376 n->Store->TexTarget = n->Children[1]->Store->TexTarget;
1377 return NULL;
1378 }
1379
1380 #if PEEPHOLE_OPTIMIZATIONS
1381 if (inst &&
1382 (n->Children[1]->Opcode != IR_SWIZZLE) &&
1383 _slang_is_temp(emitInfo->vt, n->Children[1]->Store) &&
1384 (inst->DstReg.File == n->Children[1]->Store->File) &&
1385 (inst->DstReg.Index == n->Children[1]->Store->Index) &&
1386 !n->Children[0]->Store->IsIndirect &&
1387 n->Children[0]->Store->Size <= 4) {
1388 /* Peephole optimization:
1389 * The Right-Hand-Side has its results in a temporary place.
1390 * Modify the RHS (and the prev instruction) to store its results
1391 * in the destination specified by n->Children[0].
1392 * Then, this MOVE is a no-op.
1393 * Ex:
1394 * MUL tmp, x, y;
1395 * MOV a, tmp;
1396 * becomes:
1397 * MUL a, x, y;
1398 */
1399
1400 /* fixup the previous instruction (which stored the RHS result) */
1401 assert(n->Children[0]->Store->Index >= 0);
1402 storage_to_dst_reg(&inst->DstReg, n->Children[0]->Store);
1403 return inst;
1404 }
1405 else
1406 #endif
1407 {
1408 if (n->Children[0]->Store->Size > 4) {
1409 /* move matrix/struct etc (block of registers) */
1410 slang_ir_storage dstStore = *n->Children[0]->Store;
1411 slang_ir_storage srcStore = *n->Children[1]->Store;
1412 GLint size = srcStore.Size;
1413 ASSERT(n->Children[1]->Store->Swizzle == SWIZZLE_NOOP);
1414 dstStore.Size = 4;
1415 srcStore.Size = 4;
1416 while (size >= 4) {
1417 inst = emit_instruction(emitInfo, OPCODE_MOV,
1418 &dstStore,
1419 &srcStore,
1420 NULL,
1421 NULL);
1422 inst_comment(inst, "IR_COPY block");
1423 srcStore.Index++;
1424 dstStore.Index++;
1425 size -= 4;
1426 }
1427 }
1428 else {
1429 /* single register move */
1430 char *srcAnnot, *dstAnnot;
1431 assert(n->Children[0]->Store->Index >= 0);
1432 inst = emit_instruction(emitInfo, OPCODE_MOV,
1433 n->Children[0]->Store, /* dest */
1434 n->Children[1]->Store,
1435 NULL,
1436 NULL);
1437 dstAnnot = storage_annotation(n->Children[0], emitInfo->prog);
1438 srcAnnot = storage_annotation(n->Children[1], emitInfo->prog);
1439 inst->Comment = instruction_annotation(inst->Opcode, dstAnnot,
1440 srcAnnot, NULL, NULL);
1441 }
1442 free_node_storage(emitInfo->vt, n->Children[1]);
1443 return inst;
1444 }
1445 }
1446
1447
1448 /**
1449 * An IR_COND node wraps a boolean expression which is used by an
1450 * IF or WHILE test. This is where we'll set condition codes, if needed.
1451 */
1452 static struct prog_instruction *
1453 emit_cond(slang_emit_info *emitInfo, slang_ir_node *n)
1454 {
1455 struct prog_instruction *inst;
1456
1457 assert(n->Opcode == IR_COND);
1458
1459 if (!n->Children[0])
1460 return NULL;
1461
1462 /* emit code for the expression */
1463 inst = emit(emitInfo, n->Children[0]);
1464
1465 if (!n->Children[0]->Store) {
1466 /* error recovery */
1467 return NULL;
1468 }
1469
1470 assert(n->Children[0]->Store);
1471 /*assert(n->Children[0]->Store->Size == 1);*/
1472
1473 if (emitInfo->EmitCondCodes) {
1474 if (inst &&
1475 n->Children[0]->Store &&
1476 inst->DstReg.File == n->Children[0]->Store->File &&
1477 inst->DstReg.Index == n->Children[0]->Store->Index) {
1478 /* The previous instruction wrote to the register who's value
1479 * we're testing. Just fix that instruction so that the
1480 * condition codes are computed.
1481 */
1482 inst->CondUpdate = GL_TRUE;
1483 n->Store = n->Children[0]->Store;
1484 return inst;
1485 }
1486 else {
1487 /* This'll happen for things like "if (i) ..." where no code
1488 * is normally generated for the expression "i".
1489 * Generate a move instruction just to set condition codes.
1490 */
1491 if (!alloc_node_storage(emitInfo, n, 1))
1492 return NULL;
1493 inst = emit_instruction(emitInfo, OPCODE_MOV,
1494 n->Store, /* dest */
1495 n->Children[0]->Store,
1496 NULL,
1497 NULL);
1498 inst->CondUpdate = GL_TRUE;
1499 inst_comment(inst, "COND expr");
1500 _slang_free_temp(emitInfo->vt, n->Store);
1501 return inst;
1502 }
1503 }
1504 else {
1505 /* No-op: the boolean result of the expression is in a regular reg */
1506 n->Store = n->Children[0]->Store;
1507 return inst;
1508 }
1509 }
1510
1511
1512 /**
1513 * Logical-NOT
1514 */
1515 static struct prog_instruction *
1516 emit_not(slang_emit_info *emitInfo, slang_ir_node *n)
1517 {
1518 static const struct {
1519 gl_inst_opcode op, opNot;
1520 } operators[] = {
1521 { OPCODE_SLT, OPCODE_SGE },
1522 { OPCODE_SLE, OPCODE_SGT },
1523 { OPCODE_SGT, OPCODE_SLE },
1524 { OPCODE_SGE, OPCODE_SLT },
1525 { OPCODE_SEQ, OPCODE_SNE },
1526 { OPCODE_SNE, OPCODE_SEQ },
1527 { 0, 0 }
1528 };
1529 struct prog_instruction *inst;
1530 slang_ir_storage zero;
1531 GLuint i;
1532
1533 /* child expr */
1534 inst = emit(emitInfo, n->Children[0]);
1535
1536 #if PEEPHOLE_OPTIMIZATIONS
1537 if (inst) {
1538 /* if the prev instruction was a comparison instruction, invert it */
1539 for (i = 0; operators[i].op; i++) {
1540 if (inst->Opcode == operators[i].op) {
1541 inst->Opcode = operators[i].opNot;
1542 n->Store = n->Children[0]->Store;
1543 return inst;
1544 }
1545 }
1546 }
1547 #endif
1548
1549 /* else, invert using SEQ (v = v == 0) */
1550 if (!alloc_node_storage(emitInfo, n, n->Children[0]->Store->Size))
1551 return NULL;
1552
1553 constant_to_storage(emitInfo, 0.0, &zero);
1554 inst = emit_instruction(emitInfo,
1555 OPCODE_SEQ,
1556 n->Store,
1557 n->Children[0]->Store,
1558 &zero,
1559 NULL);
1560 inst_comment(inst, "NOT");
1561
1562 free_node_storage(emitInfo->vt, n->Children[0]);
1563
1564 return inst;
1565 }
1566
1567
1568 static struct prog_instruction *
1569 emit_if(slang_emit_info *emitInfo, slang_ir_node *n)
1570 {
1571 struct gl_program *prog = emitInfo->prog;
1572 GLuint ifInstLoc, elseInstLoc = 0;
1573 GLuint condWritemask = 0;
1574
1575 /* emit condition expression code */
1576 {
1577 struct prog_instruction *inst;
1578 inst = emit(emitInfo, n->Children[0]);
1579 if (emitInfo->EmitCondCodes) {
1580 if (!inst) {
1581 /* error recovery */
1582 return NULL;
1583 }
1584 condWritemask = inst->DstReg.WriteMask;
1585 }
1586 }
1587
1588 if (!n->Children[0]->Store)
1589 return NULL;
1590
1591 #if 0
1592 assert(n->Children[0]->Store->Size == 1); /* a bool! */
1593 #endif
1594
1595 ifInstLoc = prog->NumInstructions;
1596 if (emitInfo->EmitHighLevelInstructions) {
1597 if (emitInfo->EmitCondCodes) {
1598 /* IF condcode THEN ... */
1599 struct prog_instruction *ifInst;
1600 ifInst = new_instruction(emitInfo, OPCODE_IF);
1601 ifInst->DstReg.CondMask = COND_NE; /* if cond is non-zero */
1602 /* only test the cond code (1 of 4) that was updated by the
1603 * previous instruction.
1604 */
1605 ifInst->DstReg.CondSwizzle = writemask_to_swizzle(condWritemask);
1606 }
1607 else {
1608 /* IF src[0] THEN ... */
1609 emit_instruction(emitInfo, OPCODE_IF,
1610 NULL, /* dst */
1611 n->Children[0]->Store, /* op0 */
1612 NULL,
1613 NULL);
1614 }
1615 }
1616 else {
1617 /* conditional jump to else, or endif */
1618 struct prog_instruction *ifInst = new_instruction(emitInfo, OPCODE_BRA);
1619 ifInst->DstReg.CondMask = COND_EQ; /* BRA if cond is zero */
1620 inst_comment(ifInst, "if zero");
1621 ifInst->DstReg.CondSwizzle = writemask_to_swizzle(condWritemask);
1622 }
1623
1624 /* if body */
1625 emit(emitInfo, n->Children[1]);
1626
1627 if (n->Children[2]) {
1628 /* have else body */
1629 elseInstLoc = prog->NumInstructions;
1630 if (emitInfo->EmitHighLevelInstructions) {
1631 (void) new_instruction(emitInfo, OPCODE_ELSE);
1632 }
1633 else {
1634 /* jump to endif instruction */
1635 struct prog_instruction *inst;
1636 inst = new_instruction(emitInfo, OPCODE_BRA);
1637 inst_comment(inst, "else");
1638 inst->DstReg.CondMask = COND_TR; /* always branch */
1639 }
1640 prog->Instructions[ifInstLoc].BranchTarget = prog->NumInstructions;
1641 emit(emitInfo, n->Children[2]);
1642 }
1643 else {
1644 /* no else body */
1645 prog->Instructions[ifInstLoc].BranchTarget = prog->NumInstructions;
1646 }
1647
1648 if (emitInfo->EmitHighLevelInstructions) {
1649 (void) new_instruction(emitInfo, OPCODE_ENDIF);
1650 }
1651
1652 if (n->Children[2]) {
1653 prog->Instructions[elseInstLoc].BranchTarget = prog->NumInstructions;
1654 }
1655 return NULL;
1656 }
1657
1658
1659 static struct prog_instruction *
1660 emit_loop(slang_emit_info *emitInfo, slang_ir_node *n)
1661 {
1662 struct gl_program *prog = emitInfo->prog;
1663 struct prog_instruction *endInst;
1664 GLuint beginInstLoc, tailInstLoc, endInstLoc;
1665 slang_ir_node *ir;
1666
1667 /* emit OPCODE_BGNLOOP */
1668 beginInstLoc = prog->NumInstructions;
1669 if (emitInfo->EmitHighLevelInstructions) {
1670 (void) new_instruction(emitInfo, OPCODE_BGNLOOP);
1671 }
1672
1673 /* body */
1674 emit(emitInfo, n->Children[0]);
1675
1676 /* tail */
1677 tailInstLoc = prog->NumInstructions;
1678 if (n->Children[1]) {
1679 if (emitInfo->EmitComments)
1680 emit_comment(emitInfo, "Loop tail code:");
1681 emit(emitInfo, n->Children[1]);
1682 }
1683
1684 endInstLoc = prog->NumInstructions;
1685 if (emitInfo->EmitHighLevelInstructions) {
1686 /* emit OPCODE_ENDLOOP */
1687 endInst = new_instruction(emitInfo, OPCODE_ENDLOOP);
1688 }
1689 else {
1690 /* emit unconditional BRA-nch */
1691 endInst = new_instruction(emitInfo, OPCODE_BRA);
1692 endInst->DstReg.CondMask = COND_TR; /* always true */
1693 }
1694 /* ENDLOOP's BranchTarget points to the BGNLOOP inst */
1695 endInst->BranchTarget = beginInstLoc;
1696
1697 if (emitInfo->EmitHighLevelInstructions) {
1698 /* BGNLOOP's BranchTarget points to the ENDLOOP inst */
1699 prog->Instructions[beginInstLoc].BranchTarget = prog->NumInstructions -1;
1700 }
1701
1702 /* Done emitting loop code. Now walk over the loop's linked list of
1703 * BREAK and CONT nodes, filling in their BranchTarget fields (which
1704 * will point to the ENDLOOP+1 or BGNLOOP instructions, respectively).
1705 */
1706 for (ir = n->List; ir; ir = ir->List) {
1707 struct prog_instruction *inst = prog->Instructions + ir->InstLocation;
1708 assert(inst->BranchTarget < 0);
1709 if (ir->Opcode == IR_BREAK ||
1710 ir->Opcode == IR_BREAK_IF_TRUE) {
1711 assert(inst->Opcode == OPCODE_BRK ||
1712 inst->Opcode == OPCODE_BRA);
1713 /* go to instruction after end of loop */
1714 inst->BranchTarget = endInstLoc + 1;
1715 }
1716 else {
1717 assert(ir->Opcode == IR_CONT ||
1718 ir->Opcode == IR_CONT_IF_TRUE);
1719 assert(inst->Opcode == OPCODE_CONT ||
1720 inst->Opcode == OPCODE_BRA);
1721 /* go to instruction at tail of loop */
1722 inst->BranchTarget = endInstLoc;
1723 }
1724 }
1725 return NULL;
1726 }
1727
1728
1729 /**
1730 * Unconditional "continue" or "break" statement.
1731 * Either OPCODE_CONT, OPCODE_BRK or OPCODE_BRA will be emitted.
1732 */
1733 static struct prog_instruction *
1734 emit_cont_break(slang_emit_info *emitInfo, slang_ir_node *n)
1735 {
1736 gl_inst_opcode opcode;
1737 struct prog_instruction *inst;
1738
1739 if (n->Opcode == IR_CONT) {
1740 /* we need to execute the loop's tail code before doing CONT */
1741 assert(n->Parent);
1742 assert(n->Parent->Opcode == IR_LOOP);
1743 if (n->Parent->Children[1]) {
1744 /* emit tail code */
1745 if (emitInfo->EmitComments) {
1746 emit_comment(emitInfo, "continue - tail code:");
1747 }
1748 emit(emitInfo, n->Parent->Children[1]);
1749 }
1750 }
1751
1752 /* opcode selection */
1753 if (emitInfo->EmitHighLevelInstructions) {
1754 opcode = (n->Opcode == IR_CONT) ? OPCODE_CONT : OPCODE_BRK;
1755 }
1756 else {
1757 opcode = OPCODE_BRA;
1758 }
1759 n->InstLocation = emitInfo->prog->NumInstructions;
1760 inst = new_instruction(emitInfo, opcode);
1761 inst->DstReg.CondMask = COND_TR; /* always true */
1762 return inst;
1763 }
1764
1765
1766 /**
1767 * Conditional "continue" or "break" statement.
1768 * Either OPCODE_CONT, OPCODE_BRK or OPCODE_BRA will be emitted.
1769 */
1770 static struct prog_instruction *
1771 emit_cont_break_if_true(slang_emit_info *emitInfo, slang_ir_node *n)
1772 {
1773 struct prog_instruction *inst;
1774
1775 assert(n->Opcode == IR_CONT_IF_TRUE ||
1776 n->Opcode == IR_BREAK_IF_TRUE);
1777
1778 /* evaluate condition expr, setting cond codes */
1779 inst = emit(emitInfo, n->Children[0]);
1780 if (emitInfo->EmitCondCodes) {
1781 assert(inst);
1782 inst->CondUpdate = GL_TRUE;
1783 }
1784
1785 n->InstLocation = emitInfo->prog->NumInstructions;
1786
1787 /* opcode selection */
1788 if (emitInfo->EmitHighLevelInstructions) {
1789 const gl_inst_opcode opcode
1790 = (n->Opcode == IR_CONT_IF_TRUE) ? OPCODE_CONT : OPCODE_BRK;
1791 if (emitInfo->EmitCondCodes) {
1792 /* Get the writemask from the previous instruction which set
1793 * the condcodes. Use that writemask as the CondSwizzle.
1794 */
1795 const GLuint condWritemask = inst->DstReg.WriteMask;
1796 inst = new_instruction(emitInfo, opcode);
1797 inst->DstReg.CondMask = COND_NE;
1798 inst->DstReg.CondSwizzle = writemask_to_swizzle(condWritemask);
1799 return inst;
1800 }
1801 else {
1802 /* IF reg
1803 * BRK/CONT;
1804 * ENDIF
1805 */
1806 GLint ifInstLoc;
1807 ifInstLoc = emitInfo->prog->NumInstructions;
1808 inst = emit_instruction(emitInfo, OPCODE_IF,
1809 NULL, /* dest */
1810 n->Children[0]->Store,
1811 NULL,
1812 NULL);
1813 n->InstLocation = emitInfo->prog->NumInstructions;
1814
1815 inst = new_instruction(emitInfo, opcode);
1816 inst = new_instruction(emitInfo, OPCODE_ENDIF);
1817
1818 emitInfo->prog->Instructions[ifInstLoc].BranchTarget
1819 = emitInfo->prog->NumInstructions;
1820 return inst;
1821 }
1822 }
1823 else {
1824 const GLuint condWritemask = inst->DstReg.WriteMask;
1825 assert(emitInfo->EmitCondCodes);
1826 inst = new_instruction(emitInfo, OPCODE_BRA);
1827 inst->DstReg.CondMask = COND_NE;
1828 inst->DstReg.CondSwizzle = writemask_to_swizzle(condWritemask);
1829 return inst;
1830 }
1831 }
1832
1833
1834 /**
1835 * Return the size of a swizzle mask given that some swizzle components
1836 * may be NIL/undefined. For example:
1837 * swizzle_size(".zzxx") = 4
1838 * swizzle_size(".xy??") = 2
1839 * swizzle_size(".w???") = 1
1840 */
1841 static GLuint
1842 swizzle_size(GLuint swizzle)
1843 {
1844 GLuint i;
1845 for (i = 0; i < 4; i++) {
1846 if (GET_SWZ(swizzle, i) == SWIZZLE_NIL)
1847 return i;
1848 }
1849 return 4;
1850 }
1851
1852
1853 static struct prog_instruction *
1854 emit_swizzle(slang_emit_info *emitInfo, slang_ir_node *n)
1855 {
1856 struct prog_instruction *inst;
1857
1858 inst = emit(emitInfo, n->Children[0]);
1859
1860 if (!n->Store->Parent) {
1861 /* this covers a case such as "(b ? p : q).x" */
1862 n->Store->Parent = n->Children[0]->Store;
1863 assert(n->Store->Parent);
1864 }
1865
1866 {
1867 const GLuint swizzle = n->Store->Swizzle;
1868 /* new storage is parent storage with updated Swizzle + Size fields */
1869 _slang_copy_ir_storage(n->Store, n->Store->Parent);
1870 /* Apply this node's swizzle to parent's storage */
1871 n->Store->Swizzle = _slang_swizzle_swizzle(n->Store->Swizzle, swizzle);
1872 /* Update size */
1873 n->Store->Size = swizzle_size(n->Store->Swizzle);
1874 }
1875
1876 assert(!n->Store->Parent);
1877 assert(n->Store->Index >= 0);
1878
1879 return inst;
1880 }
1881
1882
1883 /**
1884 * Dereference array element: element == array[index]
1885 * This basically involves emitting code for computing the array index
1886 * and updating the node/element's storage info.
1887 */
1888 static struct prog_instruction *
1889 emit_array_element(slang_emit_info *emitInfo, slang_ir_node *n)
1890 {
1891 slang_ir_storage *arrayStore, *indexStore;
1892 const int elemSize = n->Store->Size; /* number of floats */
1893 const GLint elemSizeVec = (elemSize + 3) / 4; /* number of vec4 */
1894 struct prog_instruction *inst;
1895
1896 assert(n->Opcode == IR_ELEMENT);
1897 assert(elemSize > 0);
1898
1899 /* special case for built-in state variables, like light state */
1900 {
1901 slang_ir_storage *root = n->Store;
1902 assert(!root->Parent);
1903 while (root->Parent)
1904 root = root->Parent;
1905
1906 if (root->File == PROGRAM_STATE_VAR) {
1907 GLboolean direct;
1908 GLint index =
1909 _slang_alloc_statevar(n, emitInfo->prog->Parameters, &direct);
1910 if (index < 0) {
1911 /* error */
1912 return NULL;
1913 }
1914 if (direct) {
1915 n->Store->Index = index;
1916 return NULL; /* all done */
1917 }
1918 }
1919 }
1920
1921 /* do codegen for array itself */
1922 emit(emitInfo, n->Children[0]);
1923 arrayStore = n->Children[0]->Store;
1924
1925 /* The initial array element storage is the array's storage,
1926 * then modified below.
1927 */
1928 _slang_copy_ir_storage(n->Store, arrayStore);
1929
1930
1931 if (n->Children[1]->Opcode == IR_FLOAT) {
1932 /* Constant array index */
1933 const GLint element = (GLint) n->Children[1]->Value[0];
1934
1935 /* this element's storage is the array's storage, plus constant offset */
1936 n->Store->Index += elemSizeVec * element;
1937 }
1938 else {
1939 /* Variable array index */
1940
1941 /* do codegen for array index expression */
1942 emit(emitInfo, n->Children[1]);
1943 indexStore = n->Children[1]->Store;
1944
1945 if (indexStore->IsIndirect) {
1946 /* need to put the array index into a temporary since we can't
1947 * directly support a[b[i]] constructs.
1948 */
1949
1950
1951 /*indexStore = tempstore();*/
1952 }
1953
1954
1955 if (elemSize > 4) {
1956 /* need to multiply array index by array element size */
1957 struct prog_instruction *inst;
1958 slang_ir_storage *indexTemp;
1959 slang_ir_storage elemSizeStore;
1960
1961 /* allocate 1 float indexTemp */
1962 indexTemp = _slang_new_ir_storage(PROGRAM_TEMPORARY, -1, 1);
1963 _slang_alloc_temp(emitInfo->vt, indexTemp);
1964
1965 /* allocate a constant containing the element size */
1966 constant_to_storage(emitInfo, (float) elemSizeVec, &elemSizeStore);
1967
1968 /* multiply array index by element size */
1969 inst = emit_instruction(emitInfo,
1970 OPCODE_MUL,
1971 indexTemp, /* dest */
1972 indexStore, /* the index */
1973 &elemSizeStore,
1974 NULL);
1975
1976 indexStore = indexTemp;
1977 }
1978
1979 if (arrayStore->IsIndirect) {
1980 /* ex: in a[i][j], a[i] (the arrayStore) is indirect */
1981 /* Need to add indexStore to arrayStore->Indirect store */
1982 slang_ir_storage indirectArray;
1983 slang_ir_storage *indexTemp;
1984
1985 _slang_init_ir_storage(&indirectArray,
1986 arrayStore->IndirectFile,
1987 arrayStore->IndirectIndex,
1988 1,
1989 arrayStore->IndirectSwizzle);
1990
1991 /* allocate 1 float indexTemp */
1992 indexTemp = _slang_new_ir_storage(PROGRAM_TEMPORARY, -1, 1);
1993 _slang_alloc_temp(emitInfo->vt, indexTemp);
1994
1995 inst = emit_instruction(emitInfo,
1996 OPCODE_ADD,
1997 indexTemp, /* dest */
1998 indexStore, /* the index */
1999 &indirectArray, /* indirect array base */
2000 NULL);
2001
2002 indexStore = indexTemp;
2003 }
2004
2005 /* update the array element storage info */
2006 n->Store->IsIndirect = GL_TRUE;
2007 n->Store->IndirectFile = indexStore->File;
2008 n->Store->IndirectIndex = indexStore->Index;
2009 n->Store->IndirectSwizzle = indexStore->Swizzle;
2010 }
2011
2012 n->Store->Size = elemSize;
2013 n->Store->Swizzle = _slang_var_swizzle(elemSize, 0);
2014
2015 return NULL; /* no instruction */
2016 }
2017
2018
2019 /**
2020 * Resolve storage for accessing a structure field.
2021 */
2022 static struct prog_instruction *
2023 emit_struct_field(slang_emit_info *emitInfo, slang_ir_node *n)
2024 {
2025 slang_ir_storage *root = n->Store;
2026 GLint fieldOffset, fieldSize;
2027
2028 assert(n->Opcode == IR_FIELD);
2029
2030 assert(!root->Parent);
2031 while (root->Parent)
2032 root = root->Parent;
2033
2034 /* If this is the field of a state var, allocate constant/uniform
2035 * storage for it now if we haven't already.
2036 * Note that we allocate storage (uniform/constant slots) for state
2037 * variables here rather than at declaration time so we only allocate
2038 * space for the ones that we actually use!
2039 */
2040 if (root->File == PROGRAM_STATE_VAR) {
2041 GLboolean direct;
2042 GLint index = _slang_alloc_statevar(n, emitInfo->prog->Parameters, &direct);
2043 if (index < 0) {
2044 slang_info_log_error(emitInfo->log, "Error parsing state variable");
2045 return NULL;
2046 }
2047 if (direct) {
2048 root->Index = index;
2049 return NULL; /* all done */
2050 }
2051 }
2052
2053 /* do codegen for struct */
2054 emit(emitInfo, n->Children[0]);
2055 assert(n->Children[0]->Store->Index >= 0);
2056
2057
2058 fieldOffset = n->Store->Index;
2059 fieldSize = n->Store->Size;
2060
2061 _slang_copy_ir_storage(n->Store, n->Children[0]->Store);
2062
2063 n->Store->Index = n->Children[0]->Store->Index + fieldOffset / 4;
2064 n->Store->Size = fieldSize;
2065
2066 switch (fieldSize) {
2067 case 1:
2068 {
2069 GLint swz = fieldOffset % 4;
2070 n->Store->Swizzle = MAKE_SWIZZLE4(swz, swz, swz, swz);
2071 }
2072 break;
2073 case 2:
2074 n->Store->Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
2075 SWIZZLE_NIL, SWIZZLE_NIL);
2076 break;
2077 case 3:
2078 n->Store->Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
2079 SWIZZLE_Z, SWIZZLE_NIL);
2080 break;
2081 default:
2082 n->Store->Swizzle = SWIZZLE_XYZW;
2083 }
2084
2085 assert(n->Store->Index >= 0);
2086
2087 return NULL; /* no instruction */
2088 }
2089
2090
2091 /**
2092 * Emit code for a variable declaration.
2093 * This usually doesn't result in any code generation, but just
2094 * memory allocation.
2095 */
2096 static struct prog_instruction *
2097 emit_var_decl(slang_emit_info *emitInfo, slang_ir_node *n)
2098 {
2099 assert(n->Store);
2100 assert(n->Store->File != PROGRAM_UNDEFINED);
2101 assert(n->Store->Size > 0);
2102 /*assert(n->Store->Index < 0);*/
2103
2104 if (!n->Var || n->Var->isTemp) {
2105 /* a nameless/temporary variable, will be freed after first use */
2106 /*NEW*/
2107 if (n->Store->Index < 0 && !_slang_alloc_temp(emitInfo->vt, n->Store)) {
2108 slang_info_log_error(emitInfo->log,
2109 "Ran out of registers, too many temporaries");
2110 return NULL;
2111 }
2112 }
2113 else {
2114 /* a regular variable */
2115 _slang_add_variable(emitInfo->vt, n->Var);
2116 if (!_slang_alloc_var(emitInfo->vt, n->Store)) {
2117 slang_info_log_error(emitInfo->log,
2118 "Ran out of registers, too many variables");
2119 return NULL;
2120 }
2121 /*
2122 printf("IR_VAR_DECL %s %d store %p\n",
2123 (char*) n->Var->a_name, n->Store->Index, (void*) n->Store);
2124 */
2125 assert(n->Var->store == n->Store);
2126 }
2127 if (emitInfo->EmitComments) {
2128 /* emit NOP with comment describing the variable's storage location */
2129 char s[1000];
2130 sprintf(s, "TEMP[%d]%s = variable %s (size %d)",
2131 n->Store->Index,
2132 _mesa_swizzle_string(n->Store->Swizzle, 0, GL_FALSE),
2133 (n->Var ? (char *) n->Var->a_name : "anonymous"),
2134 n->Store->Size);
2135 emit_comment(emitInfo, s);
2136 }
2137 return NULL;
2138 }
2139
2140
2141 /**
2142 * Emit code for a reference to a variable.
2143 * Actually, no code is generated but we may do some memory allocation.
2144 * In particular, state vars (uniforms) are allocated on an as-needed basis.
2145 */
2146 static struct prog_instruction *
2147 emit_var_ref(slang_emit_info *emitInfo, slang_ir_node *n)
2148 {
2149 assert(n->Store);
2150 assert(n->Store->File != PROGRAM_UNDEFINED);
2151
2152 if (n->Store->File == PROGRAM_STATE_VAR && n->Store->Index < 0) {
2153 GLboolean direct;
2154 GLint index = _slang_alloc_statevar(n, emitInfo->prog->Parameters, &direct);
2155 if (index < 0) {
2156 /* error */
2157 char s[100];
2158 _mesa_snprintf(s, sizeof(s), "Undefined variable '%s'",
2159 (char *) n->Var->a_name);
2160 slang_info_log_error(emitInfo->log, s);
2161 return NULL;
2162 }
2163
2164 n->Store->Index = index;
2165 }
2166 else if (n->Store->File == PROGRAM_UNIFORM ||
2167 n->Store->File == PROGRAM_SAMPLER) {
2168 /* mark var as used */
2169 _mesa_use_uniform(emitInfo->prog->Parameters, (char *) n->Var->a_name);
2170 }
2171 else if (n->Store->File == PROGRAM_INPUT) {
2172 assert(n->Store->Index >= 0);
2173 emitInfo->prog->InputsRead |= (1 << n->Store->Index);
2174 }
2175
2176 if (n->Store->Index < 0) {
2177 /* probably ran out of registers */
2178 return NULL;
2179 }
2180 assert(n->Store->Size > 0);
2181
2182 return NULL;
2183 }
2184
2185
2186 static struct prog_instruction *
2187 emit(slang_emit_info *emitInfo, slang_ir_node *n)
2188 {
2189 struct prog_instruction *inst;
2190 if (!n)
2191 return NULL;
2192
2193 if (emitInfo->log->error_flag) {
2194 return NULL;
2195 }
2196
2197 if (n->Comment) {
2198 inst = new_instruction(emitInfo, OPCODE_NOP);
2199 inst->Comment = _mesa_strdup(n->Comment);
2200 inst = NULL;
2201 }
2202
2203 switch (n->Opcode) {
2204 case IR_SEQ:
2205 /* sequence of two sub-trees */
2206 assert(n->Children[0]);
2207 assert(n->Children[1]);
2208 emit(emitInfo, n->Children[0]);
2209 if (emitInfo->log->error_flag)
2210 return NULL;
2211 inst = emit(emitInfo, n->Children[1]);
2212 #if 0
2213 assert(!n->Store);
2214 #endif
2215 n->Store = n->Children[1]->Store;
2216 return inst;
2217
2218 case IR_SCOPE:
2219 /* new variable scope */
2220 _slang_push_var_table(emitInfo->vt);
2221 inst = emit(emitInfo, n->Children[0]);
2222 _slang_pop_var_table(emitInfo->vt);
2223 return inst;
2224
2225 case IR_VAR_DECL:
2226 /* Variable declaration - allocate a register for it */
2227 inst = emit_var_decl(emitInfo, n);
2228 return inst;
2229
2230 case IR_VAR:
2231 /* Reference to a variable
2232 * Storage should have already been resolved/allocated.
2233 */
2234 return emit_var_ref(emitInfo, n);
2235
2236 case IR_ELEMENT:
2237 return emit_array_element(emitInfo, n);
2238 case IR_FIELD:
2239 return emit_struct_field(emitInfo, n);
2240 case IR_SWIZZLE:
2241 return emit_swizzle(emitInfo, n);
2242
2243 /* Simple arithmetic */
2244 /* unary */
2245 case IR_MOVE:
2246 case IR_RSQ:
2247 case IR_RCP:
2248 case IR_FLOOR:
2249 case IR_FRAC:
2250 case IR_F_TO_I:
2251 case IR_I_TO_F:
2252 case IR_ABS:
2253 case IR_SIN:
2254 case IR_COS:
2255 case IR_DDX:
2256 case IR_DDY:
2257 case IR_EXP:
2258 case IR_EXP2:
2259 case IR_LOG2:
2260 case IR_NOISE1:
2261 case IR_NOISE2:
2262 case IR_NOISE3:
2263 case IR_NOISE4:
2264 case IR_NRM4:
2265 case IR_NRM3:
2266 /* binary */
2267 case IR_ADD:
2268 case IR_SUB:
2269 case IR_MUL:
2270 case IR_DOT4:
2271 case IR_DOT3:
2272 case IR_DOT2:
2273 case IR_CROSS:
2274 case IR_MIN:
2275 case IR_MAX:
2276 case IR_SEQUAL:
2277 case IR_SNEQUAL:
2278 case IR_SGE:
2279 case IR_SGT:
2280 case IR_SLE:
2281 case IR_SLT:
2282 case IR_POW:
2283 /* trinary operators */
2284 case IR_LRP:
2285 return emit_arith(emitInfo, n);
2286
2287 case IR_EQUAL:
2288 case IR_NOTEQUAL:
2289 return emit_compare(emitInfo, n);
2290
2291 case IR_CLAMP:
2292 return emit_clamp(emitInfo, n);
2293 case IR_TEX:
2294 case IR_TEXB:
2295 case IR_TEXP:
2296 case IR_TEX_SH:
2297 case IR_TEXB_SH:
2298 case IR_TEXP_SH:
2299 return emit_tex(emitInfo, n);
2300 case IR_NEG:
2301 return emit_negation(emitInfo, n);
2302 case IR_FLOAT:
2303 /* find storage location for this float constant */
2304 n->Store->Index = _mesa_add_unnamed_constant(emitInfo->prog->Parameters,
2305 n->Value,
2306 n->Store->Size,
2307 &n->Store->Swizzle);
2308 if (n->Store->Index < 0) {
2309 slang_info_log_error(emitInfo->log, "Ran out of space for constants");
2310 return NULL;
2311 }
2312 return NULL;
2313
2314 case IR_COPY:
2315 return emit_copy(emitInfo, n);
2316
2317 case IR_COND:
2318 return emit_cond(emitInfo, n);
2319
2320 case IR_NOT:
2321 return emit_not(emitInfo, n);
2322
2323 case IR_LABEL:
2324 return emit_label(emitInfo, n);
2325
2326 case IR_KILL:
2327 return emit_kill(emitInfo);
2328
2329 case IR_CALL:
2330 /* new variable scope for subroutines/function calls */
2331 _slang_push_var_table(emitInfo->vt);
2332 inst = emit_fcall(emitInfo, n);
2333 _slang_pop_var_table(emitInfo->vt);
2334 return inst;
2335
2336 case IR_IF:
2337 return emit_if(emitInfo, n);
2338
2339 case IR_LOOP:
2340 return emit_loop(emitInfo, n);
2341 case IR_BREAK_IF_TRUE:
2342 case IR_CONT_IF_TRUE:
2343 return emit_cont_break_if_true(emitInfo, n);
2344 case IR_BREAK:
2345 /* fall-through */
2346 case IR_CONT:
2347 return emit_cont_break(emitInfo, n);
2348
2349 case IR_BEGIN_SUB:
2350 return new_instruction(emitInfo, OPCODE_BGNSUB);
2351 case IR_END_SUB:
2352 return new_instruction(emitInfo, OPCODE_ENDSUB);
2353 case IR_RETURN:
2354 return emit_return(emitInfo, n);
2355
2356 case IR_NOP:
2357 return NULL;
2358
2359 default:
2360 _mesa_problem(NULL, "Unexpected IR opcode in emit()\n");
2361 }
2362 return NULL;
2363 }
2364
2365
2366 /**
2367 * After code generation, any subroutines will be in separate program
2368 * objects. This function appends all the subroutines onto the main
2369 * program and resolves the linking of all the branch/call instructions.
2370 * XXX this logic should really be part of the linking process...
2371 */
2372 static void
2373 _slang_resolve_subroutines(slang_emit_info *emitInfo)
2374 {
2375 GET_CURRENT_CONTEXT(ctx);
2376 struct gl_program *mainP = emitInfo->prog;
2377 GLuint *subroutineLoc, i, total;
2378
2379 subroutineLoc
2380 = (GLuint *) _mesa_malloc(emitInfo->NumSubroutines * sizeof(GLuint));
2381
2382 /* total number of instructions */
2383 total = mainP->NumInstructions;
2384 for (i = 0; i < emitInfo->NumSubroutines; i++) {
2385 subroutineLoc[i] = total;
2386 total += emitInfo->Subroutines[i]->NumInstructions;
2387 }
2388
2389 /* adjust BranchTargets within the functions */
2390 for (i = 0; i < emitInfo->NumSubroutines; i++) {
2391 struct gl_program *sub = emitInfo->Subroutines[i];
2392 GLuint j;
2393 for (j = 0; j < sub->NumInstructions; j++) {
2394 struct prog_instruction *inst = sub->Instructions + j;
2395 if (inst->Opcode != OPCODE_CAL && inst->BranchTarget >= 0) {
2396 inst->BranchTarget += subroutineLoc[i];
2397 }
2398 }
2399 }
2400
2401 /* append subroutines' instructions after main's instructions */
2402 mainP->Instructions = _mesa_realloc_instructions(mainP->Instructions,
2403 mainP->NumInstructions,
2404 total);
2405 mainP->NumInstructions = total;
2406 for (i = 0; i < emitInfo->NumSubroutines; i++) {
2407 struct gl_program *sub = emitInfo->Subroutines[i];
2408 _mesa_copy_instructions(mainP->Instructions + subroutineLoc[i],
2409 sub->Instructions,
2410 sub->NumInstructions);
2411 /* delete subroutine code */
2412 sub->Parameters = NULL; /* prevent double-free */
2413 _mesa_reference_program(ctx, &emitInfo->Subroutines[i], NULL);
2414 }
2415
2416 /* free subroutine list */
2417 if (emitInfo->Subroutines) {
2418 _mesa_free(emitInfo->Subroutines);
2419 emitInfo->Subroutines = NULL;
2420 }
2421 emitInfo->NumSubroutines = 0;
2422
2423 /* Examine CAL instructions.
2424 * At this point, the BranchTarget field of the CAL instruction is
2425 * the number/id of the subroutine to call (an index into the
2426 * emitInfo->Subroutines list).
2427 * Translate that into an actual instruction location now.
2428 */
2429 for (i = 0; i < mainP->NumInstructions; i++) {
2430 struct prog_instruction *inst = mainP->Instructions + i;
2431 if (inst->Opcode == OPCODE_CAL) {
2432 const GLuint f = inst->BranchTarget;
2433 inst->BranchTarget = subroutineLoc[f];
2434 }
2435 }
2436
2437 _mesa_free(subroutineLoc);
2438 }
2439
2440
2441
2442 /**
2443 * Convert the IR tree into GPU instructions.
2444 * \param n root of IR tree
2445 * \param vt variable table
2446 * \param prog program to put GPU instructions into
2447 * \param pragmas controls codegen options
2448 * \param withEnd if true, emit END opcode at end
2449 * \param log log for emitting errors/warnings/info
2450 */
2451 GLboolean
2452 _slang_emit_code(slang_ir_node *n, slang_var_table *vt,
2453 struct gl_program *prog,
2454 const struct gl_sl_pragmas *pragmas,
2455 GLboolean withEnd,
2456 slang_info_log *log)
2457 {
2458 GET_CURRENT_CONTEXT(ctx);
2459 GLboolean success;
2460 slang_emit_info emitInfo;
2461 GLuint maxUniforms;
2462
2463 emitInfo.log = log;
2464 emitInfo.vt = vt;
2465 emitInfo.prog = prog;
2466 emitInfo.Subroutines = NULL;
2467 emitInfo.NumSubroutines = 0;
2468 emitInfo.MaxInstructions = prog->NumInstructions;
2469
2470 emitInfo.EmitHighLevelInstructions = ctx->Shader.EmitHighLevelInstructions;
2471 emitInfo.EmitCondCodes = ctx->Shader.EmitCondCodes;
2472 emitInfo.EmitComments = ctx->Shader.EmitComments || pragmas->Debug;
2473 emitInfo.EmitBeginEndSub = GL_TRUE;
2474
2475 if (!emitInfo.EmitCondCodes) {
2476 emitInfo.EmitHighLevelInstructions = GL_TRUE;
2477 }
2478
2479 /* Check uniform/constant limits */
2480 if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
2481 maxUniforms = ctx->Const.FragmentProgram.MaxUniformComponents / 4;
2482 }
2483 else {
2484 assert(prog->Target == GL_VERTEX_PROGRAM_ARB);
2485 maxUniforms = ctx->Const.VertexProgram.MaxUniformComponents / 4;
2486 }
2487 if (prog->Parameters->NumParameters > maxUniforms) {
2488 slang_info_log_error(log, "Constant/uniform register limit exceeded "
2489 "(max=%u vec4)", maxUniforms);
2490
2491 return GL_FALSE;
2492 }
2493
2494 (void) emit(&emitInfo, n);
2495
2496 /* finish up by adding the END opcode to program */
2497 if (withEnd) {
2498 struct prog_instruction *inst;
2499 inst = new_instruction(&emitInfo, OPCODE_END);
2500 }
2501
2502 _slang_resolve_subroutines(&emitInfo);
2503
2504 success = GL_TRUE;
2505
2506 #if 0
2507 printf("*********** End emit code (%u inst):\n", prog->NumInstructions);
2508 _mesa_print_program(prog);
2509 _mesa_print_program_parameters(ctx,prog);
2510 #endif
2511
2512 return success;
2513 }