Merge master and fix conflicts
[mesa.git] / src / mesa / shader / slang / slang_emit.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2005-2008 Brian Paul All Rights Reserved.
5 * Copyright (C) 2008 VMware, Inc. All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file slang_emit.c
27 * Emit program instructions (PI code) from IR trees.
28 * \author Brian Paul
29 */
30
31 /***
32 *** NOTES
33 ***
34 *** To emit GPU instructions, we basically just do an in-order traversal
35 *** of the IR tree.
36 ***/
37
38
39 #include "main/imports.h"
40 #include "main/context.h"
41 #include "main/macros.h"
42 #include "shader/program.h"
43 #include "shader/prog_instruction.h"
44 #include "shader/prog_parameter.h"
45 #include "shader/prog_print.h"
46 #include "slang_builtin.h"
47 #include "slang_emit.h"
48 #include "slang_mem.h"
49
50
51 #define PEEPHOLE_OPTIMIZATIONS 1
52 #define ANNOTATE 0
53
54
55 typedef struct
56 {
57 slang_info_log *log;
58 slang_var_table *vt;
59 struct gl_program *prog;
60 struct gl_program **Subroutines;
61 GLuint NumSubroutines;
62
63 GLuint MaxInstructions; /**< size of prog->Instructions[] buffer */
64
65 GLboolean UnresolvedFunctions;
66
67 /* code-gen options */
68 GLboolean EmitHighLevelInstructions;
69 GLboolean EmitCondCodes;
70 GLboolean EmitComments;
71 GLboolean EmitBeginEndSub; /* XXX TEMPORARY */
72 } slang_emit_info;
73
74
75
76 static struct gl_program *
77 new_subroutine(slang_emit_info *emitInfo, GLuint *id)
78 {
79 GET_CURRENT_CONTEXT(ctx);
80 const GLuint n = emitInfo->NumSubroutines;
81
82 emitInfo->Subroutines = (struct gl_program **)
83 _mesa_realloc(emitInfo->Subroutines,
84 n * sizeof(struct gl_program),
85 (n + 1) * sizeof(struct gl_program));
86 emitInfo->Subroutines[n] = ctx->Driver.NewProgram(ctx, emitInfo->prog->Target, 0);
87 emitInfo->Subroutines[n]->Parameters = emitInfo->prog->Parameters;
88 emitInfo->NumSubroutines++;
89 *id = n;
90 return emitInfo->Subroutines[n];
91 }
92
93
94 /**
95 * Convert a writemask to a swizzle. Used for testing cond codes because
96 * we only want to test the cond code component(s) that was set by the
97 * previous instruction.
98 */
99 static GLuint
100 writemask_to_swizzle(GLuint writemask)
101 {
102 if (writemask == WRITEMASK_X)
103 return SWIZZLE_XXXX;
104 if (writemask == WRITEMASK_Y)
105 return SWIZZLE_YYYY;
106 if (writemask == WRITEMASK_Z)
107 return SWIZZLE_ZZZZ;
108 if (writemask == WRITEMASK_W)
109 return SWIZZLE_WWWW;
110 return SWIZZLE_XYZW; /* shouldn't be hit */
111 }
112
113
114 /**
115 * Convert a swizzle mask to a writemask.
116 * Note that the slang_ir_storage->Swizzle field can represent either a
117 * swizzle mask or a writemask, depending on how it's used. For example,
118 * when we parse "direction.yz" alone, we don't know whether .yz is a
119 * writemask or a swizzle. In this case, we encode ".yz" in store->Swizzle
120 * as a swizzle mask (.yz?? actually). Later, if direction.yz is used as
121 * an R-value, we use store->Swizzle as-is. Otherwise, if direction.yz is
122 * used as an L-value, we convert it to a writemask.
123 */
124 static GLuint
125 swizzle_to_writemask(GLuint swizzle)
126 {
127 GLuint i, writemask = 0x0;
128 for (i = 0; i < 4; i++) {
129 GLuint swz = GET_SWZ(swizzle, i);
130 if (swz <= SWIZZLE_W) {
131 writemask |= (1 << swz);
132 }
133 }
134 return writemask;
135 }
136
137
138 /**
139 * Swizzle a swizzle (function composition).
140 * That is, return swz2(swz1), or said another way: swz1.szw2
141 * Example: swizzle_swizzle(".zwxx", ".xxyw") yields ".zzwx"
142 */
143 GLuint
144 _slang_swizzle_swizzle(GLuint swz1, GLuint swz2)
145 {
146 GLuint i, swz, s[4];
147 for (i = 0; i < 4; i++) {
148 GLuint c = GET_SWZ(swz2, i);
149 if (c <= SWIZZLE_W)
150 s[i] = GET_SWZ(swz1, c);
151 else
152 s[i] = c;
153 }
154 swz = MAKE_SWIZZLE4(s[0], s[1], s[2], s[3]);
155 return swz;
156 }
157
158
159 /**
160 * Return the default swizzle mask for accessing a variable of the
161 * given size (in floats). If size = 1, comp is used to identify
162 * which component [0..3] of the register holds the variable.
163 */
164 GLuint
165 _slang_var_swizzle(GLint size, GLint comp)
166 {
167 switch (size) {
168 case 1:
169 return MAKE_SWIZZLE4(comp, SWIZZLE_NIL, SWIZZLE_NIL, SWIZZLE_NIL);
170 case 2:
171 return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_NIL, SWIZZLE_NIL);
172 case 3:
173 return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_NIL);
174 default:
175 return SWIZZLE_XYZW;
176 }
177 }
178
179
180
181 /**
182 * Allocate storage for the given node (if it hasn't already been allocated).
183 *
184 * Typically this is temporary storage for an intermediate result (such as
185 * for a multiply or add, etc).
186 *
187 * If n->Store does not exist it will be created and will be of the size
188 * specified by defaultSize.
189 */
190 static GLboolean
191 alloc_node_storage(slang_emit_info *emitInfo, slang_ir_node *n,
192 GLint defaultSize)
193 {
194 assert(!n->Var);
195 if (!n->Store) {
196 assert(defaultSize > 0);
197 n->Store = _slang_new_ir_storage(PROGRAM_TEMPORARY, -1, defaultSize);
198 }
199
200 /* now allocate actual register(s). I.e. set n->Store->Index >= 0 */
201 if (n->Store->Index < 0) {
202 if (!_slang_alloc_temp(emitInfo->vt, n->Store)) {
203 slang_info_log_error(emitInfo->log,
204 "Ran out of registers, too many temporaries");
205 _slang_free(n->Store);
206 n->Store = NULL;
207 return GL_FALSE;
208 }
209 }
210 return GL_TRUE;
211 }
212
213
214 /**
215 * Free temporary storage, if n->Store is, in fact, temp storage.
216 * Otherwise, no-op.
217 */
218 static void
219 free_node_storage(slang_var_table *vt, slang_ir_node *n)
220 {
221 if (n->Store->File == PROGRAM_TEMPORARY &&
222 n->Store->Index >= 0 &&
223 n->Opcode != IR_SWIZZLE) {
224 if (_slang_is_temp(vt, n->Store)) {
225 _slang_free_temp(vt, n->Store);
226 n->Store->Index = -1;
227 n->Store = NULL; /* XXX this may not be needed */
228 }
229 }
230 }
231
232
233 /**
234 * Helper function to allocate a short-term temporary.
235 * Free it with _slang_free_temp().
236 */
237 static GLboolean
238 alloc_local_temp(slang_emit_info *emitInfo, slang_ir_storage *temp, GLint size)
239 {
240 assert(size >= 1);
241 assert(size <= 4);
242 _mesa_bzero(temp, sizeof(*temp));
243 temp->Size = size;
244 temp->File = PROGRAM_TEMPORARY;
245 temp->Index = -1;
246 return _slang_alloc_temp(emitInfo->vt, temp);
247 }
248
249
250 /**
251 * Remove any SWIZZLE_NIL terms from given swizzle mask.
252 * For a swizzle like .z??? generate .zzzz (replicate single component).
253 * Else, for .wx?? generate .wxzw (insert default component for the position).
254 */
255 static GLuint
256 fix_swizzle(GLuint swizzle)
257 {
258 GLuint c0 = GET_SWZ(swizzle, 0),
259 c1 = GET_SWZ(swizzle, 1),
260 c2 = GET_SWZ(swizzle, 2),
261 c3 = GET_SWZ(swizzle, 3);
262 if (c1 == SWIZZLE_NIL && c2 == SWIZZLE_NIL && c3 == SWIZZLE_NIL) {
263 /* smear first component across all positions */
264 c1 = c2 = c3 = c0;
265 }
266 else {
267 /* insert default swizzle components */
268 if (c0 == SWIZZLE_NIL)
269 c0 = SWIZZLE_X;
270 if (c1 == SWIZZLE_NIL)
271 c1 = SWIZZLE_Y;
272 if (c2 == SWIZZLE_NIL)
273 c2 = SWIZZLE_Z;
274 if (c3 == SWIZZLE_NIL)
275 c3 = SWIZZLE_W;
276 }
277 return MAKE_SWIZZLE4(c0, c1, c2, c3);
278 }
279
280
281
282 /**
283 * Convert IR storage to an instruction dst register.
284 */
285 static void
286 storage_to_dst_reg(struct prog_dst_register *dst, const slang_ir_storage *st)
287 {
288 const GLboolean relAddr = st->RelAddr;
289 const GLint size = st->Size;
290 GLint index = st->Index;
291 GLuint swizzle = st->Swizzle;
292
293 assert(index >= 0);
294 /* if this is storage relative to some parent storage, walk up the tree */
295 while (st->Parent) {
296 st = st->Parent;
297 assert(st->Index >= 0);
298 index += st->Index;
299 swizzle = _slang_swizzle_swizzle(st->Swizzle, swizzle);
300 }
301
302 assert(st->File != PROGRAM_UNDEFINED);
303 dst->File = st->File;
304
305 assert(index >= 0);
306 dst->Index = index;
307
308 assert(size >= 1);
309 assert(size <= 4);
310
311 if (swizzle != SWIZZLE_XYZW) {
312 dst->WriteMask = swizzle_to_writemask(swizzle);
313 }
314 else {
315 switch (size) {
316 case 1:
317 dst->WriteMask = WRITEMASK_X << GET_SWZ(st->Swizzle, 0);
318 break;
319 case 2:
320 dst->WriteMask = WRITEMASK_XY;
321 break;
322 case 3:
323 dst->WriteMask = WRITEMASK_XYZ;
324 break;
325 case 4:
326 dst->WriteMask = WRITEMASK_XYZW;
327 break;
328 default:
329 ; /* error would have been caught above */
330 }
331 }
332
333 dst->RelAddr = relAddr;
334 }
335
336
337 /**
338 * Convert IR storage to an instruction src register.
339 */
340 static void
341 storage_to_src_reg(struct prog_src_register *src, const slang_ir_storage *st)
342 {
343 const GLboolean relAddr = st->RelAddr;
344 GLint index = st->Index;
345 GLuint swizzle = st->Swizzle;
346
347 /* if this is storage relative to some parent storage, walk up the tree */
348 assert(index >= 0);
349 while (st->Parent) {
350 st = st->Parent;
351 if (st->Index < 0) {
352 /* an error should have been reported already */
353 return;
354 }
355 assert(st->Index >= 0);
356 index += st->Index;
357 swizzle = _slang_swizzle_swizzle(fix_swizzle(st->Swizzle), swizzle);
358 }
359
360 assert(st->File >= 0);
361 #if 1 /* XXX temporary */
362 if (st->File == PROGRAM_UNDEFINED) {
363 slang_ir_storage *st0 = (slang_ir_storage *) st;
364 st0->File = PROGRAM_TEMPORARY;
365 }
366 #endif
367 assert(st->File < PROGRAM_UNDEFINED);
368 src->File = st->File;
369
370 assert(index >= 0);
371 src->Index = index;
372
373 swizzle = fix_swizzle(swizzle);
374 assert(GET_SWZ(swizzle, 0) <= SWIZZLE_W);
375 assert(GET_SWZ(swizzle, 1) <= SWIZZLE_W);
376 assert(GET_SWZ(swizzle, 2) <= SWIZZLE_W);
377 assert(GET_SWZ(swizzle, 3) <= SWIZZLE_W);
378 src->Swizzle = swizzle;
379
380 src->RelAddr = relAddr;
381 }
382
383
384 /*
385 * Setup storage pointing to a scalar constant/literal.
386 */
387 static void
388 constant_to_storage(slang_emit_info *emitInfo,
389 GLfloat val,
390 slang_ir_storage *store)
391 {
392 GLuint swizzle;
393 GLint reg;
394 GLfloat value[4];
395
396 value[0] = val;
397 reg = _mesa_add_unnamed_constant(emitInfo->prog->Parameters,
398 value, 1, &swizzle);
399
400 memset(store, 0, sizeof(*store));
401 store->File = PROGRAM_CONSTANT;
402 store->Index = reg;
403 store->Swizzle = swizzle;
404 }
405
406
407 /**
408 * Add new instruction at end of given program.
409 * \param prog the program to append instruction onto
410 * \param opcode opcode for the new instruction
411 * \return pointer to the new instruction
412 */
413 static struct prog_instruction *
414 new_instruction(slang_emit_info *emitInfo, gl_inst_opcode opcode)
415 {
416 struct gl_program *prog = emitInfo->prog;
417 struct prog_instruction *inst;
418
419 #if 0
420 /* print prev inst */
421 if (prog->NumInstructions > 0) {
422 _mesa_print_instruction(prog->Instructions + prog->NumInstructions - 1);
423 }
424 #endif
425 assert(prog->NumInstructions <= emitInfo->MaxInstructions);
426
427 if (prog->NumInstructions == emitInfo->MaxInstructions) {
428 /* grow the instruction buffer */
429 emitInfo->MaxInstructions += 20;
430 prog->Instructions =
431 _mesa_realloc_instructions(prog->Instructions,
432 prog->NumInstructions,
433 emitInfo->MaxInstructions);
434 }
435
436 inst = prog->Instructions + prog->NumInstructions;
437 prog->NumInstructions++;
438 _mesa_init_instructions(inst, 1);
439 inst->Opcode = opcode;
440 inst->BranchTarget = -1; /* invalid */
441 /*
442 printf("New inst %d: %p %s\n", prog->NumInstructions-1,(void*)inst,
443 _mesa_opcode_string(inst->Opcode));
444 */
445 return inst;
446 }
447
448
449 static struct prog_instruction *
450 emit_arl_load(slang_emit_info *emitInfo,
451 gl_register_file file, GLint index, GLuint swizzle)
452 {
453 struct prog_instruction *inst = new_instruction(emitInfo, OPCODE_ARL);
454 inst->SrcReg[0].File = file;
455 inst->SrcReg[0].Index = index;
456 inst->SrcReg[0].Swizzle = fix_swizzle(swizzle);
457 inst->DstReg.File = PROGRAM_ADDRESS;
458 inst->DstReg.Index = 0;
459 inst->DstReg.WriteMask = WRITEMASK_X;
460 return inst;
461 }
462
463
464 /**
465 * Emit a new instruction with given opcode, operands.
466 * At this point the instruction may have multiple indirect register
467 * loads/stores. We convert those into ARL loads and address-relative
468 * operands. See comments inside.
469 * At some point in the future we could directly emit indirectly addressed
470 * registers in Mesa GPU instructions.
471 */
472 static struct prog_instruction *
473 emit_instruction(slang_emit_info *emitInfo,
474 gl_inst_opcode opcode,
475 const slang_ir_storage *dst,
476 const slang_ir_storage *src0,
477 const slang_ir_storage *src1,
478 const slang_ir_storage *src2)
479 {
480 struct prog_instruction *inst;
481 GLuint numIndirect = 0;
482 const slang_ir_storage *src[3];
483 slang_ir_storage newSrc[3], newDst;
484 GLuint i;
485 GLboolean isTemp[3];
486
487 isTemp[0] = isTemp[1] = isTemp[2] = GL_FALSE;
488
489 src[0] = src0;
490 src[1] = src1;
491 src[2] = src2;
492
493 /* count up how many operands are indirect loads */
494 for (i = 0; i < 3; i++) {
495 if (src[i] && src[i]->IsIndirect)
496 numIndirect++;
497 }
498 if (dst && dst->IsIndirect)
499 numIndirect++;
500
501 /* Take special steps for indirect register loads.
502 * If we had multiple address registers this would be simpler.
503 * For example, this GLSL code:
504 * x[i] = y[j] + z[k];
505 * would translate into something like:
506 * ARL ADDR.x, i;
507 * ARL ADDR.y, j;
508 * ARL ADDR.z, k;
509 * ADD TEMP[ADDR.x+5], TEMP[ADDR.y+9], TEMP[ADDR.z+4];
510 * But since we currently only have one address register we have to do this:
511 * ARL ADDR.x, i;
512 * MOV t1, TEMP[ADDR.x+9];
513 * ARL ADDR.x, j;
514 * MOV t2, TEMP[ADDR.x+4];
515 * ARL ADDR.x, k;
516 * ADD TEMP[ADDR.x+5], t1, t2;
517 * The code here figures this out...
518 */
519 if (numIndirect > 0) {
520 for (i = 0; i < 3; i++) {
521 if (src[i] && src[i]->IsIndirect) {
522 /* load the ARL register with the indirect register */
523 emit_arl_load(emitInfo,
524 src[i]->IndirectFile,
525 src[i]->IndirectIndex,
526 src[i]->IndirectSwizzle);
527
528 if (numIndirect > 1) {
529 /* Need to load src[i] into a temporary register */
530 slang_ir_storage srcRelAddr;
531 alloc_local_temp(emitInfo, &newSrc[i], src[i]->Size);
532 isTemp[i] = GL_TRUE;
533
534 /* set RelAddr flag on src register */
535 srcRelAddr = *src[i];
536 srcRelAddr.RelAddr = GL_TRUE;
537 srcRelAddr.IsIndirect = GL_FALSE; /* not really needed */
538
539 /* MOV newSrc, srcRelAddr; */
540 inst = emit_instruction(emitInfo,
541 OPCODE_MOV,
542 &newSrc[i],
543 &srcRelAddr,
544 NULL,
545 NULL);
546
547 src[i] = &newSrc[i];
548 }
549 else {
550 /* just rewrite the src[i] storage to be ARL-relative */
551 newSrc[i] = *src[i];
552 newSrc[i].RelAddr = GL_TRUE;
553 newSrc[i].IsIndirect = GL_FALSE; /* not really needed */
554 src[i] = &newSrc[i];
555 }
556 }
557 }
558 }
559
560 /* Take special steps for indirect dest register write */
561 if (dst && dst->IsIndirect) {
562 /* load the ARL register with the indirect register */
563 emit_arl_load(emitInfo,
564 dst->IndirectFile,
565 dst->IndirectIndex,
566 dst->IndirectSwizzle);
567 newDst = *dst;
568 newDst.RelAddr = GL_TRUE;
569 newDst.IsIndirect = GL_FALSE;
570 dst = &newDst;
571 }
572
573 /* OK, emit the instruction and its dst, src regs */
574 inst = new_instruction(emitInfo, opcode);
575 if (!inst)
576 return NULL;
577
578 if (dst)
579 storage_to_dst_reg(&inst->DstReg, dst);
580
581 for (i = 0; i < 3; i++) {
582 if (src[i])
583 storage_to_src_reg(&inst->SrcReg[i], src[i]);
584 }
585
586 /* Free any temp registers that we allocated above */
587 for (i = 0; i < 3; i++) {
588 if (isTemp[i])
589 _slang_free_temp(emitInfo->vt, &newSrc[i]);
590 }
591
592 return inst;
593 }
594
595
596
597 /**
598 * Put a comment on the given instruction.
599 */
600 static void
601 inst_comment(struct prog_instruction *inst, const char *comment)
602 {
603 if (inst)
604 inst->Comment = _mesa_strdup(comment);
605 }
606
607
608
609 /**
610 * Return pointer to last instruction in program.
611 */
612 static struct prog_instruction *
613 prev_instruction(slang_emit_info *emitInfo)
614 {
615 struct gl_program *prog = emitInfo->prog;
616 if (prog->NumInstructions == 0)
617 return NULL;
618 else
619 return prog->Instructions + prog->NumInstructions - 1;
620 }
621
622
623 static struct prog_instruction *
624 emit(slang_emit_info *emitInfo, slang_ir_node *n);
625
626
627 /**
628 * Return an annotation string for given node's storage.
629 */
630 static char *
631 storage_annotation(const slang_ir_node *n, const struct gl_program *prog)
632 {
633 #if ANNOTATE
634 const slang_ir_storage *st = n->Store;
635 static char s[100] = "";
636
637 if (!st)
638 return _mesa_strdup("");
639
640 switch (st->File) {
641 case PROGRAM_CONSTANT:
642 if (st->Index >= 0) {
643 const GLfloat *val = prog->Parameters->ParameterValues[st->Index];
644 if (st->Swizzle == SWIZZLE_NOOP)
645 sprintf(s, "{%g, %g, %g, %g}", val[0], val[1], val[2], val[3]);
646 else {
647 sprintf(s, "%g", val[GET_SWZ(st->Swizzle, 0)]);
648 }
649 }
650 break;
651 case PROGRAM_TEMPORARY:
652 if (n->Var)
653 sprintf(s, "%s", (char *) n->Var->a_name);
654 else
655 sprintf(s, "t[%d]", st->Index);
656 break;
657 case PROGRAM_STATE_VAR:
658 case PROGRAM_UNIFORM:
659 sprintf(s, "%s", prog->Parameters->Parameters[st->Index].Name);
660 break;
661 case PROGRAM_VARYING:
662 sprintf(s, "%s", prog->Varying->Parameters[st->Index].Name);
663 break;
664 case PROGRAM_INPUT:
665 sprintf(s, "input[%d]", st->Index);
666 break;
667 case PROGRAM_OUTPUT:
668 sprintf(s, "output[%d]", st->Index);
669 break;
670 default:
671 s[0] = 0;
672 }
673 return _mesa_strdup(s);
674 #else
675 return NULL;
676 #endif
677 }
678
679
680 /**
681 * Return an annotation string for an instruction.
682 */
683 static char *
684 instruction_annotation(gl_inst_opcode opcode, char *dstAnnot,
685 char *srcAnnot0, char *srcAnnot1, char *srcAnnot2)
686 {
687 #if ANNOTATE
688 const char *operator;
689 char *s;
690 int len = 50;
691
692 if (dstAnnot)
693 len += strlen(dstAnnot);
694 else
695 dstAnnot = _mesa_strdup("");
696
697 if (srcAnnot0)
698 len += strlen(srcAnnot0);
699 else
700 srcAnnot0 = _mesa_strdup("");
701
702 if (srcAnnot1)
703 len += strlen(srcAnnot1);
704 else
705 srcAnnot1 = _mesa_strdup("");
706
707 if (srcAnnot2)
708 len += strlen(srcAnnot2);
709 else
710 srcAnnot2 = _mesa_strdup("");
711
712 switch (opcode) {
713 case OPCODE_ADD:
714 operator = "+";
715 break;
716 case OPCODE_SUB:
717 operator = "-";
718 break;
719 case OPCODE_MUL:
720 operator = "*";
721 break;
722 case OPCODE_DP2:
723 operator = "DP2";
724 break;
725 case OPCODE_DP3:
726 operator = "DP3";
727 break;
728 case OPCODE_DP4:
729 operator = "DP4";
730 break;
731 case OPCODE_XPD:
732 operator = "XPD";
733 break;
734 case OPCODE_RSQ:
735 operator = "RSQ";
736 break;
737 case OPCODE_SGT:
738 operator = ">";
739 break;
740 default:
741 operator = ",";
742 }
743
744 s = (char *) malloc(len);
745 sprintf(s, "%s = %s %s %s %s", dstAnnot,
746 srcAnnot0, operator, srcAnnot1, srcAnnot2);
747 assert(_mesa_strlen(s) < len);
748
749 free(dstAnnot);
750 free(srcAnnot0);
751 free(srcAnnot1);
752 free(srcAnnot2);
753
754 return s;
755 #else
756 return NULL;
757 #endif
758 }
759
760
761 /**
762 * Emit an instruction that's just a comment.
763 */
764 static struct prog_instruction *
765 emit_comment(slang_emit_info *emitInfo, const char *comment)
766 {
767 struct prog_instruction *inst = new_instruction(emitInfo, OPCODE_NOP);
768 inst_comment(inst, comment);
769 return inst;
770 }
771
772
773 /**
774 * Generate code for a simple arithmetic instruction.
775 * Either 1, 2 or 3 operands.
776 */
777 static struct prog_instruction *
778 emit_arith(slang_emit_info *emitInfo, slang_ir_node *n)
779 {
780 const slang_ir_info *info = _slang_ir_info(n->Opcode);
781 struct prog_instruction *inst;
782 GLuint i;
783
784 assert(info);
785 assert(info->InstOpcode != OPCODE_NOP);
786
787 #if PEEPHOLE_OPTIMIZATIONS
788 /* Look for MAD opportunity */
789 if (info->NumParams == 2 &&
790 n->Opcode == IR_ADD && n->Children[0]->Opcode == IR_MUL) {
791 /* found pattern IR_ADD(IR_MUL(A, B), C) */
792 emit(emitInfo, n->Children[0]->Children[0]); /* A */
793 emit(emitInfo, n->Children[0]->Children[1]); /* B */
794 emit(emitInfo, n->Children[1]); /* C */
795 alloc_node_storage(emitInfo, n, -1); /* dest */
796
797 inst = emit_instruction(emitInfo,
798 OPCODE_MAD,
799 n->Store,
800 n->Children[0]->Children[0]->Store,
801 n->Children[0]->Children[1]->Store,
802 n->Children[1]->Store);
803
804 free_node_storage(emitInfo->vt, n->Children[0]->Children[0]);
805 free_node_storage(emitInfo->vt, n->Children[0]->Children[1]);
806 free_node_storage(emitInfo->vt, n->Children[1]);
807 return inst;
808 }
809
810 if (info->NumParams == 2 &&
811 n->Opcode == IR_ADD && n->Children[1]->Opcode == IR_MUL) {
812 /* found pattern IR_ADD(A, IR_MUL(B, C)) */
813 emit(emitInfo, n->Children[0]); /* A */
814 emit(emitInfo, n->Children[1]->Children[0]); /* B */
815 emit(emitInfo, n->Children[1]->Children[1]); /* C */
816 alloc_node_storage(emitInfo, n, -1); /* dest */
817
818 inst = emit_instruction(emitInfo,
819 OPCODE_MAD,
820 n->Store,
821 n->Children[1]->Children[0]->Store,
822 n->Children[1]->Children[1]->Store,
823 n->Children[0]->Store);
824
825 free_node_storage(emitInfo->vt, n->Children[1]->Children[0]);
826 free_node_storage(emitInfo->vt, n->Children[1]->Children[1]);
827 free_node_storage(emitInfo->vt, n->Children[0]);
828 return inst;
829 }
830 #endif
831
832 /* gen code for children, may involve temp allocation */
833 for (i = 0; i < info->NumParams; i++) {
834 emit(emitInfo, n->Children[i]);
835 if (!n->Children[i] || !n->Children[i]->Store) {
836 /* error recovery */
837 return NULL;
838 }
839 }
840
841 /* result storage */
842 alloc_node_storage(emitInfo, n, -1);
843
844 inst = emit_instruction(emitInfo,
845 info->InstOpcode,
846 n->Store, /* dest */
847 (info->NumParams > 0 ? n->Children[0]->Store : NULL),
848 (info->NumParams > 1 ? n->Children[1]->Store : NULL),
849 (info->NumParams > 2 ? n->Children[2]->Store : NULL)
850 );
851
852 /* free temps */
853 for (i = 0; i < info->NumParams; i++)
854 free_node_storage(emitInfo->vt, n->Children[i]);
855
856 return inst;
857 }
858
859
860 /**
861 * Emit code for == and != operators. These could normally be handled
862 * by emit_arith() except we need to be able to handle structure comparisons.
863 */
864 static struct prog_instruction *
865 emit_compare(slang_emit_info *emitInfo, slang_ir_node *n)
866 {
867 struct prog_instruction *inst = NULL;
868 GLint size;
869
870 assert(n->Opcode == IR_EQUAL || n->Opcode == IR_NOTEQUAL);
871
872 /* gen code for children */
873 emit(emitInfo, n->Children[0]);
874 emit(emitInfo, n->Children[1]);
875
876 if (n->Children[0]->Store->Size != n->Children[1]->Store->Size) {
877 /* XXX this error should have been caught in slang_codegen.c */
878 slang_info_log_error(emitInfo->log, "invalid operands to == or !=");
879 n->Store = NULL;
880 return NULL;
881 }
882
883 /* final result is 1 bool */
884 if (!alloc_node_storage(emitInfo, n, 1))
885 return NULL;
886
887 size = n->Children[0]->Store->Size;
888
889 if (size == 1) {
890 gl_inst_opcode opcode = n->Opcode == IR_EQUAL ? OPCODE_SEQ : OPCODE_SNE;
891 inst = emit_instruction(emitInfo,
892 opcode,
893 n->Store, /* dest */
894 n->Children[0]->Store,
895 n->Children[1]->Store,
896 NULL);
897 }
898 else if (size <= 4) {
899 /* compare two vectors.
900 * Unfortunately, there's no instruction to compare vectors and
901 * return a scalar result. Do it with some compare and dot product
902 * instructions...
903 */
904 GLuint swizzle;
905 gl_inst_opcode dotOp;
906 slang_ir_storage tempStore;
907
908 if (!alloc_local_temp(emitInfo, &tempStore, 4)) {
909 n->Store = NULL;
910 return NULL;
911 /* out of temps */
912 }
913
914 if (size == 4) {
915 dotOp = OPCODE_DP4;
916 swizzle = SWIZZLE_XYZW;
917 }
918 else if (size == 3) {
919 dotOp = OPCODE_DP3;
920 swizzle = SWIZZLE_XYZW;
921 }
922 else {
923 assert(size == 2);
924 dotOp = OPCODE_DP3; /* XXX use OPCODE_DP2 eventually */
925 swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y);
926 }
927
928 /* Compute inequality (temp = (A != B)) */
929 inst = emit_instruction(emitInfo,
930 OPCODE_SNE,
931 &tempStore,
932 n->Children[0]->Store,
933 n->Children[1]->Store,
934 NULL);
935 inst_comment(inst, "Compare values");
936
937 /* Compute val = DOT(temp, temp) (reduction) */
938 inst = emit_instruction(emitInfo,
939 dotOp,
940 n->Store,
941 &tempStore,
942 &tempStore,
943 NULL);
944 inst->SrcReg[0].Swizzle = inst->SrcReg[1].Swizzle = swizzle; /*override*/
945 inst_comment(inst, "Reduce vec to bool");
946
947 _slang_free_temp(emitInfo->vt, &tempStore); /* free temp */
948
949 if (n->Opcode == IR_EQUAL) {
950 /* compute val = !val.x with SEQ val, val, 0; */
951 slang_ir_storage zero;
952 constant_to_storage(emitInfo, 0.0, &zero);
953 inst = emit_instruction(emitInfo,
954 OPCODE_SEQ,
955 n->Store, /* dest */
956 n->Store,
957 &zero,
958 NULL);
959 inst_comment(inst, "Invert true/false");
960 }
961 }
962 else {
963 /* size > 4, struct or array compare.
964 * XXX this won't work reliably for structs with padding!!
965 */
966 GLint i, num = (n->Children[0]->Store->Size + 3) / 4;
967 slang_ir_storage accTemp, sneTemp;
968
969 if (!alloc_local_temp(emitInfo, &accTemp, 4))
970 return NULL;
971
972 if (!alloc_local_temp(emitInfo, &sneTemp, 4))
973 return NULL;
974
975 for (i = 0; i < num; i++) {
976 slang_ir_storage srcStore0 = *n->Children[0]->Store;
977 slang_ir_storage srcStore1 = *n->Children[1]->Store;
978 srcStore0.Index += i;
979 srcStore1.Index += i;
980
981 if (i == 0) {
982 /* SNE accTemp, left[i], right[i] */
983 inst = emit_instruction(emitInfo, OPCODE_SNE,
984 &accTemp, /* dest */
985 &srcStore0,
986 &srcStore1,
987 NULL);
988 inst_comment(inst, "Begin struct/array comparison");
989 }
990 else {
991 /* SNE sneTemp, left[i], right[i] */
992 inst = emit_instruction(emitInfo, OPCODE_SNE,
993 &sneTemp, /* dest */
994 &srcStore0,
995 &srcStore1,
996 NULL);
997 /* ADD accTemp, accTemp, sneTemp; # like logical-OR */
998 inst = emit_instruction(emitInfo, OPCODE_ADD,
999 &accTemp, /* dest */
1000 &accTemp,
1001 &sneTemp,
1002 NULL);
1003 }
1004 }
1005
1006 /* compute accTemp.x || accTemp.y || accTemp.z || accTemp.w with DOT4 */
1007 inst = emit_instruction(emitInfo, OPCODE_DP4,
1008 n->Store,
1009 &accTemp,
1010 &accTemp,
1011 NULL);
1012 inst_comment(inst, "End struct/array comparison");
1013
1014 if (n->Opcode == IR_EQUAL) {
1015 /* compute tmp.x = !tmp.x via tmp.x = (tmp.x == 0) */
1016 slang_ir_storage zero;
1017 constant_to_storage(emitInfo, 0.0, &zero);
1018 inst = emit_instruction(emitInfo, OPCODE_SEQ,
1019 n->Store, /* dest */
1020 n->Store,
1021 &zero,
1022 NULL);
1023 inst_comment(inst, "Invert true/false");
1024 }
1025
1026 _slang_free_temp(emitInfo->vt, &accTemp);
1027 _slang_free_temp(emitInfo->vt, &sneTemp);
1028 }
1029
1030 /* free temps */
1031 free_node_storage(emitInfo->vt, n->Children[0]);
1032 free_node_storage(emitInfo->vt, n->Children[1]);
1033
1034 return inst;
1035 }
1036
1037
1038
1039 /**
1040 * Generate code for an IR_CLAMP instruction.
1041 */
1042 static struct prog_instruction *
1043 emit_clamp(slang_emit_info *emitInfo, slang_ir_node *n)
1044 {
1045 struct prog_instruction *inst;
1046 slang_ir_node tmpNode;
1047
1048 assert(n->Opcode == IR_CLAMP);
1049 /* ch[0] = value
1050 * ch[1] = min limit
1051 * ch[2] = max limit
1052 */
1053
1054 inst = emit(emitInfo, n->Children[0]);
1055
1056 /* If lower limit == 0.0 and upper limit == 1.0,
1057 * set prev instruction's SaturateMode field to SATURATE_ZERO_ONE.
1058 * Else,
1059 * emit OPCODE_MIN, OPCODE_MAX sequence.
1060 */
1061 #if 0
1062 /* XXX this isn't quite finished yet */
1063 if (n->Children[1]->Opcode == IR_FLOAT &&
1064 n->Children[1]->Value[0] == 0.0 &&
1065 n->Children[1]->Value[1] == 0.0 &&
1066 n->Children[1]->Value[2] == 0.0 &&
1067 n->Children[1]->Value[3] == 0.0 &&
1068 n->Children[2]->Opcode == IR_FLOAT &&
1069 n->Children[2]->Value[0] == 1.0 &&
1070 n->Children[2]->Value[1] == 1.0 &&
1071 n->Children[2]->Value[2] == 1.0 &&
1072 n->Children[2]->Value[3] == 1.0) {
1073 if (!inst) {
1074 inst = prev_instruction(prog);
1075 }
1076 if (inst && inst->Opcode != OPCODE_NOP) {
1077 /* and prev instruction's DstReg matches n->Children[0]->Store */
1078 inst->SaturateMode = SATURATE_ZERO_ONE;
1079 n->Store = n->Children[0]->Store;
1080 return inst;
1081 }
1082 }
1083 #endif
1084
1085 if (!alloc_node_storage(emitInfo, n, n->Children[0]->Store->Size))
1086 return NULL;
1087
1088 emit(emitInfo, n->Children[1]);
1089 emit(emitInfo, n->Children[2]);
1090
1091 /* Some GPUs don't allow reading from output registers. So if the
1092 * dest for this clamp() is an output reg, we can't use that reg for
1093 * the intermediate result. Use a temp register instead.
1094 */
1095 _mesa_bzero(&tmpNode, sizeof(tmpNode));
1096 alloc_node_storage(emitInfo, &tmpNode, n->Store->Size);
1097
1098 /* tmp = max(ch[0], ch[1]) */
1099 inst = emit_instruction(emitInfo, OPCODE_MAX,
1100 tmpNode.Store, /* dest */
1101 n->Children[0]->Store,
1102 n->Children[1]->Store,
1103 NULL);
1104
1105 /* n->dest = min(tmp, ch[2]) */
1106 inst = emit_instruction(emitInfo, OPCODE_MIN,
1107 n->Store, /* dest */
1108 tmpNode.Store,
1109 n->Children[2]->Store,
1110 NULL);
1111
1112 free_node_storage(emitInfo->vt, &tmpNode);
1113
1114 return inst;
1115 }
1116
1117
1118 static struct prog_instruction *
1119 emit_negation(slang_emit_info *emitInfo, slang_ir_node *n)
1120 {
1121 /* Implement as MOV dst, -src; */
1122 /* XXX we could look at the previous instruction and in some circumstances
1123 * modify it to accomplish the negation.
1124 */
1125 struct prog_instruction *inst;
1126
1127 emit(emitInfo, n->Children[0]);
1128
1129 if (!alloc_node_storage(emitInfo, n, n->Children[0]->Store->Size))
1130 return NULL;
1131
1132 inst = emit_instruction(emitInfo,
1133 OPCODE_MOV,
1134 n->Store, /* dest */
1135 n->Children[0]->Store,
1136 NULL,
1137 NULL);
1138 inst->SrcReg[0].Negate = NEGATE_XYZW;
1139 return inst;
1140 }
1141
1142
1143 static struct prog_instruction *
1144 emit_label(slang_emit_info *emitInfo, const slang_ir_node *n)
1145 {
1146 assert(n->Label);
1147 #if 0
1148 /* XXX this fails in loop tail code - investigate someday */
1149 assert(_slang_label_get_location(n->Label) < 0);
1150 _slang_label_set_location(n->Label, emitInfo->prog->NumInstructions,
1151 emitInfo->prog);
1152 #else
1153 if (_slang_label_get_location(n->Label) < 0)
1154 _slang_label_set_location(n->Label, emitInfo->prog->NumInstructions,
1155 emitInfo->prog);
1156 #endif
1157 return NULL;
1158 }
1159
1160
1161 /**
1162 * Emit code for a function call.
1163 * Note that for each time a function is called, we emit the function's
1164 * body code again because the set of available registers may be different.
1165 */
1166 static struct prog_instruction *
1167 emit_fcall(slang_emit_info *emitInfo, slang_ir_node *n)
1168 {
1169 struct gl_program *progSave;
1170 struct prog_instruction *inst;
1171 GLuint subroutineId;
1172 GLuint maxInstSave;
1173
1174 assert(n->Opcode == IR_CALL);
1175 assert(n->Label);
1176
1177 /* save/push cur program */
1178 maxInstSave = emitInfo->MaxInstructions;
1179 progSave = emitInfo->prog;
1180
1181 emitInfo->prog = new_subroutine(emitInfo, &subroutineId);
1182 emitInfo->MaxInstructions = emitInfo->prog->NumInstructions;
1183
1184 _slang_label_set_location(n->Label, emitInfo->prog->NumInstructions,
1185 emitInfo->prog);
1186
1187 if (emitInfo->EmitBeginEndSub) {
1188 /* BGNSUB isn't a real instruction.
1189 * We require a label (i.e. "foobar:") though, if we're going to
1190 * print the program in the NV format. The BNGSUB instruction is
1191 * really just a NOP to attach the label to.
1192 */
1193 inst = new_instruction(emitInfo, OPCODE_BGNSUB);
1194 inst_comment(inst, n->Label->Name);
1195 }
1196
1197 /* body of function: */
1198 emit(emitInfo, n->Children[0]);
1199 n->Store = n->Children[0]->Store;
1200
1201 /* add RET instruction now, if needed */
1202 inst = prev_instruction(emitInfo);
1203 if (inst && inst->Opcode != OPCODE_RET) {
1204 inst = new_instruction(emitInfo, OPCODE_RET);
1205 }
1206
1207 if (emitInfo->EmitBeginEndSub) {
1208 inst = new_instruction(emitInfo, OPCODE_ENDSUB);
1209 inst_comment(inst, n->Label->Name);
1210 }
1211
1212 /* pop/restore cur program */
1213 emitInfo->prog = progSave;
1214 emitInfo->MaxInstructions = maxInstSave;
1215
1216 /* emit the function call */
1217 inst = new_instruction(emitInfo, OPCODE_CAL);
1218 /* The branch target is just the subroutine number (changed later) */
1219 inst->BranchTarget = subroutineId;
1220 inst_comment(inst, n->Label->Name);
1221 assert(inst->BranchTarget >= 0);
1222
1223 return inst;
1224 }
1225
1226
1227 /**
1228 * Emit code for a 'return' statement.
1229 */
1230 static struct prog_instruction *
1231 emit_return(slang_emit_info *emitInfo, slang_ir_node *n)
1232 {
1233 struct prog_instruction *inst;
1234 assert(n);
1235 assert(n->Opcode == IR_RETURN);
1236 assert(n->Label);
1237 inst = new_instruction(emitInfo, OPCODE_RET);
1238 inst->DstReg.CondMask = COND_TR; /* always return */
1239 return inst;
1240 }
1241
1242
1243 static struct prog_instruction *
1244 emit_kill(slang_emit_info *emitInfo)
1245 {
1246 struct gl_fragment_program *fp;
1247 struct prog_instruction *inst;
1248 /* NV-KILL - discard fragment depending on condition code.
1249 * Note that ARB-KILL depends on sign of vector operand.
1250 */
1251 inst = new_instruction(emitInfo, OPCODE_KIL_NV);
1252 inst->DstReg.CondMask = COND_TR; /* always kill */
1253
1254 assert(emitInfo->prog->Target == GL_FRAGMENT_PROGRAM_ARB);
1255 fp = (struct gl_fragment_program *) emitInfo->prog;
1256 fp->UsesKill = GL_TRUE;
1257
1258 return inst;
1259 }
1260
1261
1262 static struct prog_instruction *
1263 emit_tex(slang_emit_info *emitInfo, slang_ir_node *n)
1264 {
1265 struct prog_instruction *inst;
1266 gl_inst_opcode opcode;
1267 GLboolean shadow = GL_FALSE;
1268
1269 switch (n->Opcode) {
1270 case IR_TEX:
1271 opcode = OPCODE_TEX;
1272 break;
1273 case IR_TEX_SH:
1274 opcode = OPCODE_TEX;
1275 shadow = GL_TRUE;
1276 break;
1277 case IR_TEXB:
1278 opcode = OPCODE_TXB;
1279 break;
1280 case IR_TEXB_SH:
1281 opcode = OPCODE_TXB;
1282 shadow = GL_TRUE;
1283 break;
1284 case IR_TEXP:
1285 opcode = OPCODE_TXP;
1286 break;
1287 case IR_TEXP_SH:
1288 opcode = OPCODE_TXP;
1289 shadow = GL_TRUE;
1290 break;
1291 default:
1292 _mesa_problem(NULL, "Bad IR TEX code");
1293 return NULL;
1294 }
1295
1296 if (n->Children[0]->Opcode == IR_ELEMENT) {
1297 /* array is the sampler (a uniform which'll indicate the texture unit) */
1298 assert(n->Children[0]->Children[0]->Store);
1299 assert(n->Children[0]->Children[0]->Store->File == PROGRAM_SAMPLER);
1300
1301 emit(emitInfo, n->Children[0]);
1302
1303 n->Children[0]->Var = n->Children[0]->Children[0]->Var;
1304 } else {
1305 /* this is the sampler (a uniform which'll indicate the texture unit) */
1306 assert(n->Children[0]->Store);
1307 assert(n->Children[0]->Store->File == PROGRAM_SAMPLER);
1308 }
1309
1310 /* emit code for the texcoord operand */
1311 (void) emit(emitInfo, n->Children[1]);
1312
1313 /* alloc storage for result of texture fetch */
1314 if (!alloc_node_storage(emitInfo, n, 4))
1315 return NULL;
1316
1317 /* emit TEX instruction; Child[1] is the texcoord */
1318 inst = emit_instruction(emitInfo,
1319 opcode,
1320 n->Store,
1321 n->Children[1]->Store,
1322 NULL,
1323 NULL);
1324
1325 inst->TexShadow = shadow;
1326
1327 /* Store->Index is the uniform/sampler index */
1328 assert(n->Children[0]->Store->Index >= 0);
1329 inst->TexSrcUnit = n->Children[0]->Store->Index;
1330 inst->TexSrcTarget = n->Children[0]->Store->TexTarget;
1331
1332 /* mark the sampler as being used */
1333 _mesa_use_uniform(emitInfo->prog->Parameters,
1334 (char *) n->Children[0]->Var->a_name);
1335
1336 return inst;
1337 }
1338
1339
1340 /**
1341 * Assignment/copy
1342 */
1343 static struct prog_instruction *
1344 emit_copy(slang_emit_info *emitInfo, slang_ir_node *n)
1345 {
1346 struct prog_instruction *inst;
1347
1348 assert(n->Opcode == IR_COPY);
1349
1350 /* lhs */
1351 emit(emitInfo, n->Children[0]);
1352 if (!n->Children[0]->Store || n->Children[0]->Store->Index < 0) {
1353 /* an error should have been already recorded */
1354 return NULL;
1355 }
1356
1357 /* rhs */
1358 assert(n->Children[1]);
1359 inst = emit(emitInfo, n->Children[1]);
1360
1361 if (!n->Children[1]->Store || n->Children[1]->Store->Index < 0) {
1362 if (!emitInfo->log->text && !emitInfo->UnresolvedFunctions) {
1363 /* XXX this error should have been caught in slang_codegen.c */
1364 slang_info_log_error(emitInfo->log, "invalid assignment");
1365 }
1366 return NULL;
1367 }
1368
1369 assert(n->Children[1]->Store->Index >= 0);
1370
1371 /*assert(n->Children[0]->Store->Size == n->Children[1]->Store->Size);*/
1372
1373 n->Store = n->Children[0]->Store;
1374
1375 if (n->Store->File == PROGRAM_SAMPLER) {
1376 /* no code generated for sampler assignments,
1377 * just copy the sampler index/target at compile time.
1378 */
1379 n->Store->Index = n->Children[1]->Store->Index;
1380 n->Store->TexTarget = n->Children[1]->Store->TexTarget;
1381 return NULL;
1382 }
1383
1384 #if PEEPHOLE_OPTIMIZATIONS
1385 if (inst &&
1386 (n->Children[1]->Opcode != IR_SWIZZLE) &&
1387 _slang_is_temp(emitInfo->vt, n->Children[1]->Store) &&
1388 (inst->DstReg.File == n->Children[1]->Store->File) &&
1389 (inst->DstReg.Index == n->Children[1]->Store->Index) &&
1390 !n->Children[0]->Store->IsIndirect &&
1391 n->Children[0]->Store->Size <= 4) {
1392 /* Peephole optimization:
1393 * The Right-Hand-Side has its results in a temporary place.
1394 * Modify the RHS (and the prev instruction) to store its results
1395 * in the destination specified by n->Children[0].
1396 * Then, this MOVE is a no-op.
1397 * Ex:
1398 * MUL tmp, x, y;
1399 * MOV a, tmp;
1400 * becomes:
1401 * MUL a, x, y;
1402 */
1403
1404 /* fixup the previous instruction (which stored the RHS result) */
1405 assert(n->Children[0]->Store->Index >= 0);
1406 storage_to_dst_reg(&inst->DstReg, n->Children[0]->Store);
1407 return inst;
1408 }
1409 else
1410 #endif
1411 {
1412 if (n->Children[0]->Store->Size > 4) {
1413 /* move matrix/struct etc (block of registers) */
1414 slang_ir_storage dstStore = *n->Children[0]->Store;
1415 slang_ir_storage srcStore = *n->Children[1]->Store;
1416 GLint size = srcStore.Size;
1417 ASSERT(n->Children[1]->Store->Swizzle == SWIZZLE_NOOP);
1418 dstStore.Size = 4;
1419 srcStore.Size = 4;
1420 while (size >= 4) {
1421 inst = emit_instruction(emitInfo, OPCODE_MOV,
1422 &dstStore,
1423 &srcStore,
1424 NULL,
1425 NULL);
1426 inst_comment(inst, "IR_COPY block");
1427 srcStore.Index++;
1428 dstStore.Index++;
1429 size -= 4;
1430 }
1431 }
1432 else {
1433 /* single register move */
1434 char *srcAnnot, *dstAnnot;
1435 assert(n->Children[0]->Store->Index >= 0);
1436 inst = emit_instruction(emitInfo, OPCODE_MOV,
1437 n->Children[0]->Store, /* dest */
1438 n->Children[1]->Store,
1439 NULL,
1440 NULL);
1441 dstAnnot = storage_annotation(n->Children[0], emitInfo->prog);
1442 srcAnnot = storage_annotation(n->Children[1], emitInfo->prog);
1443 inst->Comment = instruction_annotation(inst->Opcode, dstAnnot,
1444 srcAnnot, NULL, NULL);
1445 }
1446 free_node_storage(emitInfo->vt, n->Children[1]);
1447 return inst;
1448 }
1449 }
1450
1451
1452 /**
1453 * An IR_COND node wraps a boolean expression which is used by an
1454 * IF or WHILE test. This is where we'll set condition codes, if needed.
1455 */
1456 static struct prog_instruction *
1457 emit_cond(slang_emit_info *emitInfo, slang_ir_node *n)
1458 {
1459 struct prog_instruction *inst;
1460
1461 assert(n->Opcode == IR_COND);
1462
1463 if (!n->Children[0])
1464 return NULL;
1465
1466 /* emit code for the expression */
1467 inst = emit(emitInfo, n->Children[0]);
1468
1469 if (!n->Children[0]->Store) {
1470 /* error recovery */
1471 return NULL;
1472 }
1473
1474 assert(n->Children[0]->Store);
1475 /*assert(n->Children[0]->Store->Size == 1);*/
1476
1477 if (emitInfo->EmitCondCodes) {
1478 if (inst &&
1479 n->Children[0]->Store &&
1480 inst->DstReg.File == n->Children[0]->Store->File &&
1481 inst->DstReg.Index == n->Children[0]->Store->Index) {
1482 /* The previous instruction wrote to the register who's value
1483 * we're testing. Just fix that instruction so that the
1484 * condition codes are computed.
1485 */
1486 inst->CondUpdate = GL_TRUE;
1487 n->Store = n->Children[0]->Store;
1488 return inst;
1489 }
1490 else {
1491 /* This'll happen for things like "if (i) ..." where no code
1492 * is normally generated for the expression "i".
1493 * Generate a move instruction just to set condition codes.
1494 */
1495 if (!alloc_node_storage(emitInfo, n, 1))
1496 return NULL;
1497 inst = emit_instruction(emitInfo, OPCODE_MOV,
1498 n->Store, /* dest */
1499 n->Children[0]->Store,
1500 NULL,
1501 NULL);
1502 inst->CondUpdate = GL_TRUE;
1503 inst_comment(inst, "COND expr");
1504 _slang_free_temp(emitInfo->vt, n->Store);
1505 return inst;
1506 }
1507 }
1508 else {
1509 /* No-op: the boolean result of the expression is in a regular reg */
1510 n->Store = n->Children[0]->Store;
1511 return inst;
1512 }
1513 }
1514
1515
1516 /**
1517 * Logical-NOT
1518 */
1519 static struct prog_instruction *
1520 emit_not(slang_emit_info *emitInfo, slang_ir_node *n)
1521 {
1522 static const struct {
1523 gl_inst_opcode op, opNot;
1524 } operators[] = {
1525 { OPCODE_SLT, OPCODE_SGE },
1526 { OPCODE_SLE, OPCODE_SGT },
1527 { OPCODE_SGT, OPCODE_SLE },
1528 { OPCODE_SGE, OPCODE_SLT },
1529 { OPCODE_SEQ, OPCODE_SNE },
1530 { OPCODE_SNE, OPCODE_SEQ },
1531 { 0, 0 }
1532 };
1533 struct prog_instruction *inst;
1534 slang_ir_storage zero;
1535 GLuint i;
1536
1537 /* child expr */
1538 inst = emit(emitInfo, n->Children[0]);
1539
1540 #if PEEPHOLE_OPTIMIZATIONS
1541 if (inst) {
1542 /* if the prev instruction was a comparison instruction, invert it */
1543 for (i = 0; operators[i].op; i++) {
1544 if (inst->Opcode == operators[i].op) {
1545 inst->Opcode = operators[i].opNot;
1546 n->Store = n->Children[0]->Store;
1547 return inst;
1548 }
1549 }
1550 }
1551 #endif
1552
1553 /* else, invert using SEQ (v = v == 0) */
1554 if (!alloc_node_storage(emitInfo, n, n->Children[0]->Store->Size))
1555 return NULL;
1556
1557 constant_to_storage(emitInfo, 0.0, &zero);
1558 inst = emit_instruction(emitInfo,
1559 OPCODE_SEQ,
1560 n->Store,
1561 n->Children[0]->Store,
1562 &zero,
1563 NULL);
1564 inst_comment(inst, "NOT");
1565
1566 free_node_storage(emitInfo->vt, n->Children[0]);
1567
1568 return inst;
1569 }
1570
1571
1572 static struct prog_instruction *
1573 emit_if(slang_emit_info *emitInfo, slang_ir_node *n)
1574 {
1575 struct gl_program *prog = emitInfo->prog;
1576 GLuint ifInstLoc, elseInstLoc = 0;
1577 GLuint condWritemask = 0;
1578
1579 /* emit condition expression code */
1580 {
1581 struct prog_instruction *inst;
1582 inst = emit(emitInfo, n->Children[0]);
1583 if (emitInfo->EmitCondCodes) {
1584 if (!inst) {
1585 /* error recovery */
1586 return NULL;
1587 }
1588 condWritemask = inst->DstReg.WriteMask;
1589 }
1590 }
1591
1592 if (!n->Children[0]->Store)
1593 return NULL;
1594
1595 #if 0
1596 assert(n->Children[0]->Store->Size == 1); /* a bool! */
1597 #endif
1598
1599 ifInstLoc = prog->NumInstructions;
1600 if (emitInfo->EmitHighLevelInstructions) {
1601 if (emitInfo->EmitCondCodes) {
1602 /* IF condcode THEN ... */
1603 struct prog_instruction *ifInst;
1604 ifInst = new_instruction(emitInfo, OPCODE_IF);
1605 ifInst->DstReg.CondMask = COND_NE; /* if cond is non-zero */
1606 /* only test the cond code (1 of 4) that was updated by the
1607 * previous instruction.
1608 */
1609 ifInst->DstReg.CondSwizzle = writemask_to_swizzle(condWritemask);
1610 }
1611 else {
1612 /* IF src[0] THEN ... */
1613 emit_instruction(emitInfo, OPCODE_IF,
1614 NULL, /* dst */
1615 n->Children[0]->Store, /* op0 */
1616 NULL,
1617 NULL);
1618 }
1619 }
1620 else {
1621 /* conditional jump to else, or endif */
1622 struct prog_instruction *ifInst = new_instruction(emitInfo, OPCODE_BRA);
1623 ifInst->DstReg.CondMask = COND_EQ; /* BRA if cond is zero */
1624 inst_comment(ifInst, "if zero");
1625 ifInst->DstReg.CondSwizzle = writemask_to_swizzle(condWritemask);
1626 }
1627
1628 /* if body */
1629 emit(emitInfo, n->Children[1]);
1630
1631 if (n->Children[2]) {
1632 /* have else body */
1633 elseInstLoc = prog->NumInstructions;
1634 if (emitInfo->EmitHighLevelInstructions) {
1635 (void) new_instruction(emitInfo, OPCODE_ELSE);
1636 }
1637 else {
1638 /* jump to endif instruction */
1639 struct prog_instruction *inst;
1640 inst = new_instruction(emitInfo, OPCODE_BRA);
1641 inst_comment(inst, "else");
1642 inst->DstReg.CondMask = COND_TR; /* always branch */
1643 }
1644 prog->Instructions[ifInstLoc].BranchTarget = prog->NumInstructions;
1645 emit(emitInfo, n->Children[2]);
1646 }
1647 else {
1648 /* no else body */
1649 prog->Instructions[ifInstLoc].BranchTarget = prog->NumInstructions;
1650 }
1651
1652 if (emitInfo->EmitHighLevelInstructions) {
1653 (void) new_instruction(emitInfo, OPCODE_ENDIF);
1654 }
1655
1656 if (n->Children[2]) {
1657 prog->Instructions[elseInstLoc].BranchTarget = prog->NumInstructions;
1658 }
1659 return NULL;
1660 }
1661
1662
1663 static struct prog_instruction *
1664 emit_loop(slang_emit_info *emitInfo, slang_ir_node *n)
1665 {
1666 struct gl_program *prog = emitInfo->prog;
1667 struct prog_instruction *endInst;
1668 GLuint beginInstLoc, tailInstLoc, endInstLoc;
1669 slang_ir_node *ir;
1670
1671 /* emit OPCODE_BGNLOOP */
1672 beginInstLoc = prog->NumInstructions;
1673 if (emitInfo->EmitHighLevelInstructions) {
1674 (void) new_instruction(emitInfo, OPCODE_BGNLOOP);
1675 }
1676
1677 /* body */
1678 emit(emitInfo, n->Children[0]);
1679
1680 /* tail */
1681 tailInstLoc = prog->NumInstructions;
1682 if (n->Children[1]) {
1683 if (emitInfo->EmitComments)
1684 emit_comment(emitInfo, "Loop tail code:");
1685 emit(emitInfo, n->Children[1]);
1686 }
1687
1688 endInstLoc = prog->NumInstructions;
1689 if (emitInfo->EmitHighLevelInstructions) {
1690 /* emit OPCODE_ENDLOOP */
1691 endInst = new_instruction(emitInfo, OPCODE_ENDLOOP);
1692 }
1693 else {
1694 /* emit unconditional BRA-nch */
1695 endInst = new_instruction(emitInfo, OPCODE_BRA);
1696 endInst->DstReg.CondMask = COND_TR; /* always true */
1697 }
1698 /* ENDLOOP's BranchTarget points to the BGNLOOP inst */
1699 endInst->BranchTarget = beginInstLoc;
1700
1701 if (emitInfo->EmitHighLevelInstructions) {
1702 /* BGNLOOP's BranchTarget points to the ENDLOOP inst */
1703 prog->Instructions[beginInstLoc].BranchTarget = prog->NumInstructions -1;
1704 }
1705
1706 /* Done emitting loop code. Now walk over the loop's linked list of
1707 * BREAK and CONT nodes, filling in their BranchTarget fields (which
1708 * will point to the ENDLOOP+1 or BGNLOOP instructions, respectively).
1709 */
1710 for (ir = n->List; ir; ir = ir->List) {
1711 struct prog_instruction *inst = prog->Instructions + ir->InstLocation;
1712 assert(inst->BranchTarget < 0);
1713 if (ir->Opcode == IR_BREAK ||
1714 ir->Opcode == IR_BREAK_IF_TRUE) {
1715 assert(inst->Opcode == OPCODE_BRK ||
1716 inst->Opcode == OPCODE_BRA);
1717 /* go to instruction after end of loop */
1718 inst->BranchTarget = endInstLoc + 1;
1719 }
1720 else {
1721 assert(ir->Opcode == IR_CONT ||
1722 ir->Opcode == IR_CONT_IF_TRUE);
1723 assert(inst->Opcode == OPCODE_CONT ||
1724 inst->Opcode == OPCODE_BRA);
1725 /* go to instruction at tail of loop */
1726 inst->BranchTarget = endInstLoc;
1727 }
1728 }
1729 return NULL;
1730 }
1731
1732
1733 /**
1734 * Unconditional "continue" or "break" statement.
1735 * Either OPCODE_CONT, OPCODE_BRK or OPCODE_BRA will be emitted.
1736 */
1737 static struct prog_instruction *
1738 emit_cont_break(slang_emit_info *emitInfo, slang_ir_node *n)
1739 {
1740 gl_inst_opcode opcode;
1741 struct prog_instruction *inst;
1742
1743 if (n->Opcode == IR_CONT) {
1744 /* we need to execute the loop's tail code before doing CONT */
1745 assert(n->Parent);
1746 assert(n->Parent->Opcode == IR_LOOP);
1747 if (n->Parent->Children[1]) {
1748 /* emit tail code */
1749 if (emitInfo->EmitComments) {
1750 emit_comment(emitInfo, "continue - tail code:");
1751 }
1752 emit(emitInfo, n->Parent->Children[1]);
1753 }
1754 }
1755
1756 /* opcode selection */
1757 if (emitInfo->EmitHighLevelInstructions) {
1758 opcode = (n->Opcode == IR_CONT) ? OPCODE_CONT : OPCODE_BRK;
1759 }
1760 else {
1761 opcode = OPCODE_BRA;
1762 }
1763 n->InstLocation = emitInfo->prog->NumInstructions;
1764 inst = new_instruction(emitInfo, opcode);
1765 inst->DstReg.CondMask = COND_TR; /* always true */
1766 return inst;
1767 }
1768
1769
1770 /**
1771 * Conditional "continue" or "break" statement.
1772 * Either OPCODE_CONT, OPCODE_BRK or OPCODE_BRA will be emitted.
1773 */
1774 static struct prog_instruction *
1775 emit_cont_break_if_true(slang_emit_info *emitInfo, slang_ir_node *n)
1776 {
1777 struct prog_instruction *inst;
1778
1779 assert(n->Opcode == IR_CONT_IF_TRUE ||
1780 n->Opcode == IR_BREAK_IF_TRUE);
1781
1782 /* evaluate condition expr, setting cond codes */
1783 inst = emit(emitInfo, n->Children[0]);
1784 if (emitInfo->EmitCondCodes) {
1785 assert(inst);
1786 inst->CondUpdate = GL_TRUE;
1787 }
1788
1789 n->InstLocation = emitInfo->prog->NumInstructions;
1790
1791 /* opcode selection */
1792 if (emitInfo->EmitHighLevelInstructions) {
1793 const gl_inst_opcode opcode
1794 = (n->Opcode == IR_CONT_IF_TRUE) ? OPCODE_CONT : OPCODE_BRK;
1795 if (emitInfo->EmitCondCodes) {
1796 /* Get the writemask from the previous instruction which set
1797 * the condcodes. Use that writemask as the CondSwizzle.
1798 */
1799 const GLuint condWritemask = inst->DstReg.WriteMask;
1800 inst = new_instruction(emitInfo, opcode);
1801 inst->DstReg.CondMask = COND_NE;
1802 inst->DstReg.CondSwizzle = writemask_to_swizzle(condWritemask);
1803 return inst;
1804 }
1805 else {
1806 /* IF reg
1807 * BRK/CONT;
1808 * ENDIF
1809 */
1810 GLint ifInstLoc;
1811 ifInstLoc = emitInfo->prog->NumInstructions;
1812 inst = emit_instruction(emitInfo, OPCODE_IF,
1813 NULL, /* dest */
1814 n->Children[0]->Store,
1815 NULL,
1816 NULL);
1817 n->InstLocation = emitInfo->prog->NumInstructions;
1818
1819 inst = new_instruction(emitInfo, opcode);
1820 inst = new_instruction(emitInfo, OPCODE_ENDIF);
1821
1822 emitInfo->prog->Instructions[ifInstLoc].BranchTarget
1823 = emitInfo->prog->NumInstructions;
1824 return inst;
1825 }
1826 }
1827 else {
1828 const GLuint condWritemask = inst->DstReg.WriteMask;
1829 assert(emitInfo->EmitCondCodes);
1830 inst = new_instruction(emitInfo, OPCODE_BRA);
1831 inst->DstReg.CondMask = COND_NE;
1832 inst->DstReg.CondSwizzle = writemask_to_swizzle(condWritemask);
1833 return inst;
1834 }
1835 }
1836
1837
1838 /**
1839 * Return the size of a swizzle mask given that some swizzle components
1840 * may be NIL/undefined. For example:
1841 * swizzle_size(".zzxx") = 4
1842 * swizzle_size(".xy??") = 2
1843 * swizzle_size(".w???") = 1
1844 */
1845 static GLuint
1846 swizzle_size(GLuint swizzle)
1847 {
1848 GLuint i;
1849 for (i = 0; i < 4; i++) {
1850 if (GET_SWZ(swizzle, i) == SWIZZLE_NIL)
1851 return i;
1852 }
1853 return 4;
1854 }
1855
1856
1857 static struct prog_instruction *
1858 emit_swizzle(slang_emit_info *emitInfo, slang_ir_node *n)
1859 {
1860 struct prog_instruction *inst;
1861
1862 inst = emit(emitInfo, n->Children[0]);
1863
1864 if (!n->Store->Parent) {
1865 /* this covers a case such as "(b ? p : q).x" */
1866 n->Store->Parent = n->Children[0]->Store;
1867 assert(n->Store->Parent);
1868 }
1869
1870 {
1871 const GLuint swizzle = n->Store->Swizzle;
1872 /* new storage is parent storage with updated Swizzle + Size fields */
1873 _slang_copy_ir_storage(n->Store, n->Store->Parent);
1874 /* Apply this node's swizzle to parent's storage */
1875 n->Store->Swizzle = _slang_swizzle_swizzle(n->Store->Swizzle, swizzle);
1876 /* Update size */
1877 n->Store->Size = swizzle_size(n->Store->Swizzle);
1878 }
1879
1880 assert(!n->Store->Parent);
1881 assert(n->Store->Index >= 0);
1882
1883 return inst;
1884 }
1885
1886
1887 /**
1888 * Dereference array element: element == array[index]
1889 * This basically involves emitting code for computing the array index
1890 * and updating the node/element's storage info.
1891 */
1892 static struct prog_instruction *
1893 emit_array_element(slang_emit_info *emitInfo, slang_ir_node *n)
1894 {
1895 slang_ir_storage *arrayStore, *indexStore;
1896 const int elemSize = n->Store->Size; /* number of floats */
1897 const GLint elemSizeVec = (elemSize + 3) / 4; /* number of vec4 */
1898 struct prog_instruction *inst;
1899
1900 assert(n->Opcode == IR_ELEMENT);
1901 assert(elemSize > 0);
1902
1903 /* special case for built-in state variables, like light state */
1904 {
1905 slang_ir_storage *root = n->Store;
1906 assert(!root->Parent);
1907 while (root->Parent)
1908 root = root->Parent;
1909
1910 if (root->File == PROGRAM_STATE_VAR) {
1911 GLboolean direct;
1912 GLint index =
1913 _slang_alloc_statevar(n, emitInfo->prog->Parameters, &direct);
1914 if (index < 0) {
1915 /* error */
1916 return NULL;
1917 }
1918 if (direct) {
1919 n->Store->Index = index;
1920 return NULL; /* all done */
1921 }
1922 }
1923 }
1924
1925 /* do codegen for array itself */
1926 emit(emitInfo, n->Children[0]);
1927 arrayStore = n->Children[0]->Store;
1928
1929 /* The initial array element storage is the array's storage,
1930 * then modified below.
1931 */
1932 _slang_copy_ir_storage(n->Store, arrayStore);
1933
1934
1935 if (n->Children[1]->Opcode == IR_FLOAT) {
1936 /* Constant array index */
1937 const GLint element = (GLint) n->Children[1]->Value[0];
1938
1939 /* this element's storage is the array's storage, plus constant offset */
1940 n->Store->Index += elemSizeVec * element;
1941 }
1942 else {
1943 /* Variable array index */
1944
1945 /* do codegen for array index expression */
1946 emit(emitInfo, n->Children[1]);
1947 indexStore = n->Children[1]->Store;
1948
1949 if (indexStore->IsIndirect) {
1950 /* need to put the array index into a temporary since we can't
1951 * directly support a[b[i]] constructs.
1952 */
1953
1954
1955 /*indexStore = tempstore();*/
1956 }
1957
1958
1959 if (elemSize > 4) {
1960 /* need to multiply array index by array element size */
1961 struct prog_instruction *inst;
1962 slang_ir_storage *indexTemp;
1963 slang_ir_storage elemSizeStore;
1964
1965 /* allocate 1 float indexTemp */
1966 indexTemp = _slang_new_ir_storage(PROGRAM_TEMPORARY, -1, 1);
1967 _slang_alloc_temp(emitInfo->vt, indexTemp);
1968
1969 /* allocate a constant containing the element size */
1970 constant_to_storage(emitInfo, (float) elemSizeVec, &elemSizeStore);
1971
1972 /* multiply array index by element size */
1973 inst = emit_instruction(emitInfo,
1974 OPCODE_MUL,
1975 indexTemp, /* dest */
1976 indexStore, /* the index */
1977 &elemSizeStore,
1978 NULL);
1979
1980 indexStore = indexTemp;
1981 }
1982
1983 if (arrayStore->IsIndirect) {
1984 /* ex: in a[i][j], a[i] (the arrayStore) is indirect */
1985 /* Need to add indexStore to arrayStore->Indirect store */
1986 slang_ir_storage indirectArray;
1987 slang_ir_storage *indexTemp;
1988
1989 _slang_init_ir_storage(&indirectArray,
1990 arrayStore->IndirectFile,
1991 arrayStore->IndirectIndex,
1992 1,
1993 arrayStore->IndirectSwizzle);
1994
1995 /* allocate 1 float indexTemp */
1996 indexTemp = _slang_new_ir_storage(PROGRAM_TEMPORARY, -1, 1);
1997 _slang_alloc_temp(emitInfo->vt, indexTemp);
1998
1999 inst = emit_instruction(emitInfo,
2000 OPCODE_ADD,
2001 indexTemp, /* dest */
2002 indexStore, /* the index */
2003 &indirectArray, /* indirect array base */
2004 NULL);
2005
2006 indexStore = indexTemp;
2007 }
2008
2009 /* update the array element storage info */
2010 n->Store->IsIndirect = GL_TRUE;
2011 n->Store->IndirectFile = indexStore->File;
2012 n->Store->IndirectIndex = indexStore->Index;
2013 n->Store->IndirectSwizzle = indexStore->Swizzle;
2014 }
2015
2016 n->Store->Size = elemSize;
2017 n->Store->Swizzle = _slang_var_swizzle(elemSize, 0);
2018
2019 return NULL; /* no instruction */
2020 }
2021
2022
2023 /**
2024 * Resolve storage for accessing a structure field.
2025 */
2026 static struct prog_instruction *
2027 emit_struct_field(slang_emit_info *emitInfo, slang_ir_node *n)
2028 {
2029 slang_ir_storage *root = n->Store;
2030 GLint fieldOffset, fieldSize;
2031
2032 assert(n->Opcode == IR_FIELD);
2033
2034 assert(!root->Parent);
2035 while (root->Parent)
2036 root = root->Parent;
2037
2038 /* If this is the field of a state var, allocate constant/uniform
2039 * storage for it now if we haven't already.
2040 * Note that we allocate storage (uniform/constant slots) for state
2041 * variables here rather than at declaration time so we only allocate
2042 * space for the ones that we actually use!
2043 */
2044 if (root->File == PROGRAM_STATE_VAR) {
2045 GLboolean direct;
2046 GLint index = _slang_alloc_statevar(n, emitInfo->prog->Parameters, &direct);
2047 if (index < 0) {
2048 slang_info_log_error(emitInfo->log, "Error parsing state variable");
2049 return NULL;
2050 }
2051 if (direct) {
2052 root->Index = index;
2053 return NULL; /* all done */
2054 }
2055 }
2056
2057 /* do codegen for struct */
2058 emit(emitInfo, n->Children[0]);
2059 assert(n->Children[0]->Store->Index >= 0);
2060
2061
2062 fieldOffset = n->Store->Index;
2063 fieldSize = n->Store->Size;
2064
2065 _slang_copy_ir_storage(n->Store, n->Children[0]->Store);
2066
2067 n->Store->Index = n->Children[0]->Store->Index + fieldOffset / 4;
2068 n->Store->Size = fieldSize;
2069
2070 switch (fieldSize) {
2071 case 1:
2072 {
2073 GLint swz = fieldOffset % 4;
2074 n->Store->Swizzle = MAKE_SWIZZLE4(swz, swz, swz, swz);
2075 }
2076 break;
2077 case 2:
2078 n->Store->Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
2079 SWIZZLE_NIL, SWIZZLE_NIL);
2080 break;
2081 case 3:
2082 n->Store->Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
2083 SWIZZLE_Z, SWIZZLE_NIL);
2084 break;
2085 default:
2086 n->Store->Swizzle = SWIZZLE_XYZW;
2087 }
2088
2089 assert(n->Store->Index >= 0);
2090
2091 return NULL; /* no instruction */
2092 }
2093
2094
2095 /**
2096 * Emit code for a variable declaration.
2097 * This usually doesn't result in any code generation, but just
2098 * memory allocation.
2099 */
2100 static struct prog_instruction *
2101 emit_var_decl(slang_emit_info *emitInfo, slang_ir_node *n)
2102 {
2103 assert(n->Store);
2104 assert(n->Store->File != PROGRAM_UNDEFINED);
2105 assert(n->Store->Size > 0);
2106 /*assert(n->Store->Index < 0);*/
2107
2108 if (!n->Var || n->Var->isTemp) {
2109 /* a nameless/temporary variable, will be freed after first use */
2110 /*NEW*/
2111 if (n->Store->Index < 0 && !_slang_alloc_temp(emitInfo->vt, n->Store)) {
2112 slang_info_log_error(emitInfo->log,
2113 "Ran out of registers, too many temporaries");
2114 return NULL;
2115 }
2116 }
2117 else {
2118 /* a regular variable */
2119 _slang_add_variable(emitInfo->vt, n->Var);
2120 if (!_slang_alloc_var(emitInfo->vt, n->Store)) {
2121 slang_info_log_error(emitInfo->log,
2122 "Ran out of registers, too many variables");
2123 return NULL;
2124 }
2125 /*
2126 printf("IR_VAR_DECL %s %d store %p\n",
2127 (char*) n->Var->a_name, n->Store->Index, (void*) n->Store);
2128 */
2129 assert(n->Var->store == n->Store);
2130 }
2131 if (emitInfo->EmitComments) {
2132 /* emit NOP with comment describing the variable's storage location */
2133 char s[1000];
2134 sprintf(s, "TEMP[%d]%s = variable %s (size %d)",
2135 n->Store->Index,
2136 _mesa_swizzle_string(n->Store->Swizzle, 0, GL_FALSE),
2137 (n->Var ? (char *) n->Var->a_name : "anonymous"),
2138 n->Store->Size);
2139 emit_comment(emitInfo, s);
2140 }
2141 return NULL;
2142 }
2143
2144
2145 /**
2146 * Emit code for a reference to a variable.
2147 * Actually, no code is generated but we may do some memory allocation.
2148 * In particular, state vars (uniforms) are allocated on an as-needed basis.
2149 */
2150 static struct prog_instruction *
2151 emit_var_ref(slang_emit_info *emitInfo, slang_ir_node *n)
2152 {
2153 assert(n->Store);
2154 assert(n->Store->File != PROGRAM_UNDEFINED);
2155
2156 if (n->Store->File == PROGRAM_STATE_VAR && n->Store->Index < 0) {
2157 GLboolean direct;
2158 GLint index = _slang_alloc_statevar(n, emitInfo->prog->Parameters, &direct);
2159 if (index < 0) {
2160 /* error */
2161 char s[100];
2162 /* XXX isn't this really an out of memory/resources error? */
2163 _mesa_snprintf(s, sizeof(s), "Undefined variable '%s'",
2164 (char *) n->Var->a_name);
2165 slang_info_log_error(emitInfo->log, s);
2166 return NULL;
2167 }
2168
2169 n->Store->Index = index;
2170 }
2171 else if (n->Store->File == PROGRAM_UNIFORM ||
2172 n->Store->File == PROGRAM_SAMPLER) {
2173 /* mark var as used */
2174 _mesa_use_uniform(emitInfo->prog->Parameters, (char *) n->Var->a_name);
2175 }
2176 else if (n->Store->File == PROGRAM_INPUT) {
2177 assert(n->Store->Index >= 0);
2178 emitInfo->prog->InputsRead |= (1 << n->Store->Index);
2179 }
2180
2181 if (n->Store->Index < 0) {
2182 /* probably ran out of registers */
2183 return NULL;
2184 }
2185 assert(n->Store->Size > 0);
2186
2187 return NULL;
2188 }
2189
2190
2191 static struct prog_instruction *
2192 emit(slang_emit_info *emitInfo, slang_ir_node *n)
2193 {
2194 struct prog_instruction *inst;
2195 if (!n)
2196 return NULL;
2197
2198 if (emitInfo->log->error_flag) {
2199 return NULL;
2200 }
2201
2202 if (n->Comment) {
2203 inst = new_instruction(emitInfo, OPCODE_NOP);
2204 inst->Comment = _mesa_strdup(n->Comment);
2205 inst = NULL;
2206 }
2207
2208 switch (n->Opcode) {
2209 case IR_SEQ:
2210 /* sequence of two sub-trees */
2211 assert(n->Children[0]);
2212 assert(n->Children[1]);
2213 emit(emitInfo, n->Children[0]);
2214 if (emitInfo->log->error_flag)
2215 return NULL;
2216 inst = emit(emitInfo, n->Children[1]);
2217 #if 0
2218 assert(!n->Store);
2219 #endif
2220 n->Store = n->Children[1]->Store;
2221 return inst;
2222
2223 case IR_SCOPE:
2224 /* new variable scope */
2225 _slang_push_var_table(emitInfo->vt);
2226 inst = emit(emitInfo, n->Children[0]);
2227 _slang_pop_var_table(emitInfo->vt);
2228 return inst;
2229
2230 case IR_VAR_DECL:
2231 /* Variable declaration - allocate a register for it */
2232 inst = emit_var_decl(emitInfo, n);
2233 return inst;
2234
2235 case IR_VAR:
2236 /* Reference to a variable
2237 * Storage should have already been resolved/allocated.
2238 */
2239 return emit_var_ref(emitInfo, n);
2240
2241 case IR_ELEMENT:
2242 return emit_array_element(emitInfo, n);
2243 case IR_FIELD:
2244 return emit_struct_field(emitInfo, n);
2245 case IR_SWIZZLE:
2246 return emit_swizzle(emitInfo, n);
2247
2248 /* Simple arithmetic */
2249 /* unary */
2250 case IR_MOVE:
2251 case IR_RSQ:
2252 case IR_RCP:
2253 case IR_FLOOR:
2254 case IR_FRAC:
2255 case IR_F_TO_I:
2256 case IR_I_TO_F:
2257 case IR_ABS:
2258 case IR_SIN:
2259 case IR_COS:
2260 case IR_DDX:
2261 case IR_DDY:
2262 case IR_EXP:
2263 case IR_EXP2:
2264 case IR_LOG2:
2265 case IR_NOISE1:
2266 case IR_NOISE2:
2267 case IR_NOISE3:
2268 case IR_NOISE4:
2269 case IR_NRM4:
2270 case IR_NRM3:
2271 /* binary */
2272 case IR_ADD:
2273 case IR_SUB:
2274 case IR_MUL:
2275 case IR_DOT4:
2276 case IR_DOT3:
2277 case IR_DOT2:
2278 case IR_CROSS:
2279 case IR_MIN:
2280 case IR_MAX:
2281 case IR_SEQUAL:
2282 case IR_SNEQUAL:
2283 case IR_SGE:
2284 case IR_SGT:
2285 case IR_SLE:
2286 case IR_SLT:
2287 case IR_POW:
2288 /* trinary operators */
2289 case IR_LRP:
2290 return emit_arith(emitInfo, n);
2291
2292 case IR_EQUAL:
2293 case IR_NOTEQUAL:
2294 return emit_compare(emitInfo, n);
2295
2296 case IR_CLAMP:
2297 return emit_clamp(emitInfo, n);
2298 case IR_TEX:
2299 case IR_TEXB:
2300 case IR_TEXP:
2301 case IR_TEX_SH:
2302 case IR_TEXB_SH:
2303 case IR_TEXP_SH:
2304 return emit_tex(emitInfo, n);
2305 case IR_NEG:
2306 return emit_negation(emitInfo, n);
2307 case IR_FLOAT:
2308 /* find storage location for this float constant */
2309 n->Store->Index = _mesa_add_unnamed_constant(emitInfo->prog->Parameters,
2310 n->Value,
2311 n->Store->Size,
2312 &n->Store->Swizzle);
2313 if (n->Store->Index < 0) {
2314 slang_info_log_error(emitInfo->log, "Ran out of space for constants");
2315 return NULL;
2316 }
2317 return NULL;
2318
2319 case IR_COPY:
2320 return emit_copy(emitInfo, n);
2321
2322 case IR_COND:
2323 return emit_cond(emitInfo, n);
2324
2325 case IR_NOT:
2326 return emit_not(emitInfo, n);
2327
2328 case IR_LABEL:
2329 return emit_label(emitInfo, n);
2330
2331 case IR_KILL:
2332 return emit_kill(emitInfo);
2333
2334 case IR_CALL:
2335 /* new variable scope for subroutines/function calls */
2336 _slang_push_var_table(emitInfo->vt);
2337 inst = emit_fcall(emitInfo, n);
2338 _slang_pop_var_table(emitInfo->vt);
2339 return inst;
2340
2341 case IR_IF:
2342 return emit_if(emitInfo, n);
2343
2344 case IR_LOOP:
2345 return emit_loop(emitInfo, n);
2346 case IR_BREAK_IF_TRUE:
2347 case IR_CONT_IF_TRUE:
2348 return emit_cont_break_if_true(emitInfo, n);
2349 case IR_BREAK:
2350 /* fall-through */
2351 case IR_CONT:
2352 return emit_cont_break(emitInfo, n);
2353
2354 case IR_BEGIN_SUB:
2355 return new_instruction(emitInfo, OPCODE_BGNSUB);
2356 case IR_END_SUB:
2357 return new_instruction(emitInfo, OPCODE_ENDSUB);
2358 case IR_RETURN:
2359 return emit_return(emitInfo, n);
2360
2361 case IR_NOP:
2362 return NULL;
2363
2364 default:
2365 _mesa_problem(NULL, "Unexpected IR opcode in emit()\n");
2366 }
2367 return NULL;
2368 }
2369
2370
2371 /**
2372 * After code generation, any subroutines will be in separate program
2373 * objects. This function appends all the subroutines onto the main
2374 * program and resolves the linking of all the branch/call instructions.
2375 * XXX this logic should really be part of the linking process...
2376 */
2377 static void
2378 _slang_resolve_subroutines(slang_emit_info *emitInfo)
2379 {
2380 GET_CURRENT_CONTEXT(ctx);
2381 struct gl_program *mainP = emitInfo->prog;
2382 GLuint *subroutineLoc, i, total;
2383
2384 subroutineLoc
2385 = (GLuint *) _mesa_malloc(emitInfo->NumSubroutines * sizeof(GLuint));
2386
2387 /* total number of instructions */
2388 total = mainP->NumInstructions;
2389 for (i = 0; i < emitInfo->NumSubroutines; i++) {
2390 subroutineLoc[i] = total;
2391 total += emitInfo->Subroutines[i]->NumInstructions;
2392 }
2393
2394 /* adjust BranchTargets within the functions */
2395 for (i = 0; i < emitInfo->NumSubroutines; i++) {
2396 struct gl_program *sub = emitInfo->Subroutines[i];
2397 GLuint j;
2398 for (j = 0; j < sub->NumInstructions; j++) {
2399 struct prog_instruction *inst = sub->Instructions + j;
2400 if (inst->Opcode != OPCODE_CAL && inst->BranchTarget >= 0) {
2401 inst->BranchTarget += subroutineLoc[i];
2402 }
2403 }
2404 }
2405
2406 /* append subroutines' instructions after main's instructions */
2407 mainP->Instructions = _mesa_realloc_instructions(mainP->Instructions,
2408 mainP->NumInstructions,
2409 total);
2410 mainP->NumInstructions = total;
2411 for (i = 0; i < emitInfo->NumSubroutines; i++) {
2412 struct gl_program *sub = emitInfo->Subroutines[i];
2413 _mesa_copy_instructions(mainP->Instructions + subroutineLoc[i],
2414 sub->Instructions,
2415 sub->NumInstructions);
2416 /* delete subroutine code */
2417 sub->Parameters = NULL; /* prevent double-free */
2418 _mesa_reference_program(ctx, &emitInfo->Subroutines[i], NULL);
2419 }
2420
2421 /* free subroutine list */
2422 if (emitInfo->Subroutines) {
2423 _mesa_free(emitInfo->Subroutines);
2424 emitInfo->Subroutines = NULL;
2425 }
2426 emitInfo->NumSubroutines = 0;
2427
2428 /* Examine CAL instructions.
2429 * At this point, the BranchTarget field of the CAL instruction is
2430 * the number/id of the subroutine to call (an index into the
2431 * emitInfo->Subroutines list).
2432 * Translate that into an actual instruction location now.
2433 */
2434 for (i = 0; i < mainP->NumInstructions; i++) {
2435 struct prog_instruction *inst = mainP->Instructions + i;
2436 if (inst->Opcode == OPCODE_CAL) {
2437 const GLuint f = inst->BranchTarget;
2438 inst->BranchTarget = subroutineLoc[f];
2439 }
2440 }
2441
2442 _mesa_free(subroutineLoc);
2443 }
2444
2445
2446
2447 /**
2448 * Convert the IR tree into GPU instructions.
2449 * \param n root of IR tree
2450 * \param vt variable table
2451 * \param prog program to put GPU instructions into
2452 * \param pragmas controls codegen options
2453 * \param withEnd if true, emit END opcode at end
2454 * \param log log for emitting errors/warnings/info
2455 */
2456 GLboolean
2457 _slang_emit_code(slang_ir_node *n, slang_var_table *vt,
2458 struct gl_program *prog,
2459 const struct gl_sl_pragmas *pragmas,
2460 GLboolean withEnd,
2461 slang_info_log *log)
2462 {
2463 GET_CURRENT_CONTEXT(ctx);
2464 GLboolean success;
2465 slang_emit_info emitInfo;
2466 GLuint maxUniforms;
2467
2468 emitInfo.log = log;
2469 emitInfo.vt = vt;
2470 emitInfo.prog = prog;
2471 emitInfo.Subroutines = NULL;
2472 emitInfo.NumSubroutines = 0;
2473 emitInfo.MaxInstructions = prog->NumInstructions;
2474
2475 emitInfo.EmitHighLevelInstructions = ctx->Shader.EmitHighLevelInstructions;
2476 emitInfo.EmitCondCodes = ctx->Shader.EmitCondCodes;
2477 emitInfo.EmitComments = ctx->Shader.EmitComments || pragmas->Debug;
2478 emitInfo.EmitBeginEndSub = GL_TRUE;
2479
2480 if (!emitInfo.EmitCondCodes) {
2481 emitInfo.EmitHighLevelInstructions = GL_TRUE;
2482 }
2483
2484 /* Check uniform/constant limits */
2485 if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
2486 maxUniforms = ctx->Const.FragmentProgram.MaxUniformComponents / 4;
2487 }
2488 else {
2489 assert(prog->Target == GL_VERTEX_PROGRAM_ARB);
2490 maxUniforms = ctx->Const.VertexProgram.MaxUniformComponents / 4;
2491 }
2492 if (prog->Parameters->NumParameters > maxUniforms) {
2493 slang_info_log_error(log, "Constant/uniform register limit exceeded "
2494 "(max=%u vec4)", maxUniforms);
2495
2496 return GL_FALSE;
2497 }
2498
2499 (void) emit(&emitInfo, n);
2500
2501 /* finish up by adding the END opcode to program */
2502 if (withEnd) {
2503 struct prog_instruction *inst;
2504 inst = new_instruction(&emitInfo, OPCODE_END);
2505 }
2506
2507 _slang_resolve_subroutines(&emitInfo);
2508
2509 success = GL_TRUE;
2510
2511 #if 0
2512 printf("*********** End emit code (%u inst):\n", prog->NumInstructions);
2513 _mesa_print_program(prog);
2514 _mesa_print_program_parameters(ctx,prog);
2515 #endif
2516
2517 return success;
2518 }