slang: Check return value from new_instruction().
[mesa.git] / src / mesa / shader / slang / slang_emit.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2005-2008 Brian Paul All Rights Reserved.
5 * Copyright (C) 2008 VMware, Inc. All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file slang_emit.c
27 * Emit program instructions (PI code) from IR trees.
28 * \author Brian Paul
29 */
30
31 /***
32 *** NOTES
33 ***
34 *** To emit GPU instructions, we basically just do an in-order traversal
35 *** of the IR tree.
36 ***/
37
38
39 #include "main/imports.h"
40 #include "main/context.h"
41 #include "main/macros.h"
42 #include "shader/program.h"
43 #include "shader/prog_instruction.h"
44 #include "shader/prog_parameter.h"
45 #include "shader/prog_print.h"
46 #include "slang_builtin.h"
47 #include "slang_emit.h"
48 #include "slang_mem.h"
49
50
51 #define PEEPHOLE_OPTIMIZATIONS 1
52 #define ANNOTATE 0
53
54
55 typedef struct
56 {
57 slang_info_log *log;
58 slang_var_table *vt;
59 struct gl_program *prog;
60 struct gl_program **Subroutines;
61 GLuint NumSubroutines;
62
63 GLuint MaxInstructions; /**< size of prog->Instructions[] buffer */
64
65 GLboolean UnresolvedFunctions;
66
67 /* code-gen options */
68 GLboolean EmitHighLevelInstructions;
69 GLboolean EmitCondCodes;
70 GLboolean EmitComments;
71 GLboolean EmitBeginEndSub; /* XXX TEMPORARY */
72 } slang_emit_info;
73
74
75
76 static struct gl_program *
77 new_subroutine(slang_emit_info *emitInfo, GLuint *id)
78 {
79 GET_CURRENT_CONTEXT(ctx);
80 const GLuint n = emitInfo->NumSubroutines;
81
82 emitInfo->Subroutines = (struct gl_program **)
83 _mesa_realloc(emitInfo->Subroutines,
84 n * sizeof(struct gl_program),
85 (n + 1) * sizeof(struct gl_program));
86 emitInfo->Subroutines[n] = ctx->Driver.NewProgram(ctx, emitInfo->prog->Target, 0);
87 emitInfo->Subroutines[n]->Parameters = emitInfo->prog->Parameters;
88 emitInfo->NumSubroutines++;
89 *id = n;
90 return emitInfo->Subroutines[n];
91 }
92
93
94 /**
95 * Convert a writemask to a swizzle. Used for testing cond codes because
96 * we only want to test the cond code component(s) that was set by the
97 * previous instruction.
98 */
99 static GLuint
100 writemask_to_swizzle(GLuint writemask)
101 {
102 if (writemask == WRITEMASK_X)
103 return SWIZZLE_XXXX;
104 if (writemask == WRITEMASK_Y)
105 return SWIZZLE_YYYY;
106 if (writemask == WRITEMASK_Z)
107 return SWIZZLE_ZZZZ;
108 if (writemask == WRITEMASK_W)
109 return SWIZZLE_WWWW;
110 return SWIZZLE_XYZW; /* shouldn't be hit */
111 }
112
113
114 /**
115 * Convert a swizzle mask to a writemask.
116 * Note that the slang_ir_storage->Swizzle field can represent either a
117 * swizzle mask or a writemask, depending on how it's used. For example,
118 * when we parse "direction.yz" alone, we don't know whether .yz is a
119 * writemask or a swizzle. In this case, we encode ".yz" in store->Swizzle
120 * as a swizzle mask (.yz?? actually). Later, if direction.yz is used as
121 * an R-value, we use store->Swizzle as-is. Otherwise, if direction.yz is
122 * used as an L-value, we convert it to a writemask.
123 */
124 static GLuint
125 swizzle_to_writemask(GLuint swizzle)
126 {
127 GLuint i, writemask = 0x0;
128 for (i = 0; i < 4; i++) {
129 GLuint swz = GET_SWZ(swizzle, i);
130 if (swz <= SWIZZLE_W) {
131 writemask |= (1 << swz);
132 }
133 }
134 return writemask;
135 }
136
137
138 /**
139 * Swizzle a swizzle (function composition).
140 * That is, return swz2(swz1), or said another way: swz1.szw2
141 * Example: swizzle_swizzle(".zwxx", ".xxyw") yields ".zzwx"
142 */
143 GLuint
144 _slang_swizzle_swizzle(GLuint swz1, GLuint swz2)
145 {
146 GLuint i, swz, s[4];
147 for (i = 0; i < 4; i++) {
148 GLuint c = GET_SWZ(swz2, i);
149 if (c <= SWIZZLE_W)
150 s[i] = GET_SWZ(swz1, c);
151 else
152 s[i] = c;
153 }
154 swz = MAKE_SWIZZLE4(s[0], s[1], s[2], s[3]);
155 return swz;
156 }
157
158
159 /**
160 * Return the default swizzle mask for accessing a variable of the
161 * given size (in floats). If size = 1, comp is used to identify
162 * which component [0..3] of the register holds the variable.
163 */
164 GLuint
165 _slang_var_swizzle(GLint size, GLint comp)
166 {
167 switch (size) {
168 case 1:
169 return MAKE_SWIZZLE4(comp, SWIZZLE_NIL, SWIZZLE_NIL, SWIZZLE_NIL);
170 case 2:
171 return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_NIL, SWIZZLE_NIL);
172 case 3:
173 return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_NIL);
174 default:
175 return SWIZZLE_XYZW;
176 }
177 }
178
179
180
181 /**
182 * Allocate storage for the given node (if it hasn't already been allocated).
183 *
184 * Typically this is temporary storage for an intermediate result (such as
185 * for a multiply or add, etc).
186 *
187 * If n->Store does not exist it will be created and will be of the size
188 * specified by defaultSize.
189 */
190 static GLboolean
191 alloc_node_storage(slang_emit_info *emitInfo, slang_ir_node *n,
192 GLint defaultSize)
193 {
194 assert(!n->Var);
195 if (!n->Store) {
196 assert(defaultSize > 0);
197 n->Store = _slang_new_ir_storage(PROGRAM_TEMPORARY, -1, defaultSize);
198 }
199
200 /* now allocate actual register(s). I.e. set n->Store->Index >= 0 */
201 if (n->Store->Index < 0) {
202 if (!_slang_alloc_temp(emitInfo->vt, n->Store)) {
203 slang_info_log_error(emitInfo->log,
204 "Ran out of registers, too many temporaries");
205 _slang_free(n->Store);
206 n->Store = NULL;
207 return GL_FALSE;
208 }
209 }
210 return GL_TRUE;
211 }
212
213
214 /**
215 * Free temporary storage, if n->Store is, in fact, temp storage.
216 * Otherwise, no-op.
217 */
218 static void
219 free_node_storage(slang_var_table *vt, slang_ir_node *n)
220 {
221 if (n->Store->File == PROGRAM_TEMPORARY &&
222 n->Store->Index >= 0 &&
223 n->Opcode != IR_SWIZZLE) {
224 if (_slang_is_temp(vt, n->Store)) {
225 _slang_free_temp(vt, n->Store);
226 n->Store->Index = -1;
227 n->Store = NULL; /* XXX this may not be needed */
228 }
229 }
230 }
231
232
233 /**
234 * Helper function to allocate a short-term temporary.
235 * Free it with _slang_free_temp().
236 */
237 static GLboolean
238 alloc_local_temp(slang_emit_info *emitInfo, slang_ir_storage *temp, GLint size)
239 {
240 assert(size >= 1);
241 assert(size <= 4);
242 _mesa_bzero(temp, sizeof(*temp));
243 temp->Size = size;
244 temp->File = PROGRAM_TEMPORARY;
245 temp->Index = -1;
246 return _slang_alloc_temp(emitInfo->vt, temp);
247 }
248
249
250 /**
251 * Remove any SWIZZLE_NIL terms from given swizzle mask.
252 * For a swizzle like .z??? generate .zzzz (replicate single component).
253 * Else, for .wx?? generate .wxzw (insert default component for the position).
254 */
255 static GLuint
256 fix_swizzle(GLuint swizzle)
257 {
258 GLuint c0 = GET_SWZ(swizzle, 0),
259 c1 = GET_SWZ(swizzle, 1),
260 c2 = GET_SWZ(swizzle, 2),
261 c3 = GET_SWZ(swizzle, 3);
262 if (c1 == SWIZZLE_NIL && c2 == SWIZZLE_NIL && c3 == SWIZZLE_NIL) {
263 /* smear first component across all positions */
264 c1 = c2 = c3 = c0;
265 }
266 else {
267 /* insert default swizzle components */
268 if (c0 == SWIZZLE_NIL)
269 c0 = SWIZZLE_X;
270 if (c1 == SWIZZLE_NIL)
271 c1 = SWIZZLE_Y;
272 if (c2 == SWIZZLE_NIL)
273 c2 = SWIZZLE_Z;
274 if (c3 == SWIZZLE_NIL)
275 c3 = SWIZZLE_W;
276 }
277 return MAKE_SWIZZLE4(c0, c1, c2, c3);
278 }
279
280
281
282 /**
283 * Convert IR storage to an instruction dst register.
284 */
285 static void
286 storage_to_dst_reg(struct prog_dst_register *dst, const slang_ir_storage *st)
287 {
288 const GLboolean relAddr = st->RelAddr;
289 const GLint size = st->Size;
290 GLint index = st->Index;
291 GLuint swizzle = st->Swizzle;
292
293 assert(index >= 0);
294 /* if this is storage relative to some parent storage, walk up the tree */
295 while (st->Parent) {
296 st = st->Parent;
297 assert(st->Index >= 0);
298 index += st->Index;
299 swizzle = _slang_swizzle_swizzle(st->Swizzle, swizzle);
300 }
301
302 assert(st->File != PROGRAM_UNDEFINED);
303 dst->File = st->File;
304
305 assert(index >= 0);
306 dst->Index = index;
307
308 assert(size >= 1);
309 assert(size <= 4);
310
311 if (swizzle != SWIZZLE_XYZW) {
312 dst->WriteMask = swizzle_to_writemask(swizzle);
313 }
314 else {
315 switch (size) {
316 case 1:
317 dst->WriteMask = WRITEMASK_X << GET_SWZ(st->Swizzle, 0);
318 break;
319 case 2:
320 dst->WriteMask = WRITEMASK_XY;
321 break;
322 case 3:
323 dst->WriteMask = WRITEMASK_XYZ;
324 break;
325 case 4:
326 dst->WriteMask = WRITEMASK_XYZW;
327 break;
328 default:
329 ; /* error would have been caught above */
330 }
331 }
332
333 dst->RelAddr = relAddr;
334 }
335
336
337 /**
338 * Convert IR storage to an instruction src register.
339 */
340 static void
341 storage_to_src_reg(struct prog_src_register *src, const slang_ir_storage *st)
342 {
343 const GLboolean relAddr = st->RelAddr;
344 GLint index = st->Index;
345 GLuint swizzle = st->Swizzle;
346
347 /* if this is storage relative to some parent storage, walk up the tree */
348 assert(index >= 0);
349 while (st->Parent) {
350 st = st->Parent;
351 if (st->Index < 0) {
352 /* an error should have been reported already */
353 return;
354 }
355 assert(st->Index >= 0);
356 index += st->Index;
357 swizzle = _slang_swizzle_swizzle(fix_swizzle(st->Swizzle), swizzle);
358 }
359
360 assert(st->File >= 0);
361 #if 1 /* XXX temporary */
362 if (st->File == PROGRAM_UNDEFINED) {
363 slang_ir_storage *st0 = (slang_ir_storage *) st;
364 st0->File = PROGRAM_TEMPORARY;
365 }
366 #endif
367 assert(st->File < PROGRAM_UNDEFINED);
368 src->File = st->File;
369
370 assert(index >= 0);
371 src->Index = index;
372
373 swizzle = fix_swizzle(swizzle);
374 assert(GET_SWZ(swizzle, 0) <= SWIZZLE_W);
375 assert(GET_SWZ(swizzle, 1) <= SWIZZLE_W);
376 assert(GET_SWZ(swizzle, 2) <= SWIZZLE_W);
377 assert(GET_SWZ(swizzle, 3) <= SWIZZLE_W);
378 src->Swizzle = swizzle;
379
380 src->RelAddr = relAddr;
381 }
382
383
384 /*
385 * Setup storage pointing to a scalar constant/literal.
386 */
387 static void
388 constant_to_storage(slang_emit_info *emitInfo,
389 GLfloat val,
390 slang_ir_storage *store)
391 {
392 GLuint swizzle;
393 GLint reg;
394 GLfloat value[4];
395
396 value[0] = val;
397 reg = _mesa_add_unnamed_constant(emitInfo->prog->Parameters,
398 value, 1, &swizzle);
399
400 memset(store, 0, sizeof(*store));
401 store->File = PROGRAM_CONSTANT;
402 store->Index = reg;
403 store->Swizzle = swizzle;
404 }
405
406
407 /**
408 * Add new instruction at end of given program.
409 * \param prog the program to append instruction onto
410 * \param opcode opcode for the new instruction
411 * \return pointer to the new instruction
412 */
413 static struct prog_instruction *
414 new_instruction(slang_emit_info *emitInfo, gl_inst_opcode opcode)
415 {
416 struct gl_program *prog = emitInfo->prog;
417 struct prog_instruction *inst;
418
419 #if 0
420 /* print prev inst */
421 if (prog->NumInstructions > 0) {
422 _mesa_print_instruction(prog->Instructions + prog->NumInstructions - 1);
423 }
424 #endif
425 assert(prog->NumInstructions <= emitInfo->MaxInstructions);
426
427 if (prog->NumInstructions == emitInfo->MaxInstructions) {
428 /* grow the instruction buffer */
429 emitInfo->MaxInstructions += 20;
430 prog->Instructions =
431 _mesa_realloc_instructions(prog->Instructions,
432 prog->NumInstructions,
433 emitInfo->MaxInstructions);
434 if (!prog->Instructions) {
435 return NULL;
436 }
437 }
438
439 inst = prog->Instructions + prog->NumInstructions;
440 prog->NumInstructions++;
441 _mesa_init_instructions(inst, 1);
442 inst->Opcode = opcode;
443 inst->BranchTarget = -1; /* invalid */
444 /*
445 printf("New inst %d: %p %s\n", prog->NumInstructions-1,(void*)inst,
446 _mesa_opcode_string(inst->Opcode));
447 */
448 return inst;
449 }
450
451
452 static struct prog_instruction *
453 emit_arl_load(slang_emit_info *emitInfo,
454 gl_register_file file, GLint index, GLuint swizzle)
455 {
456 struct prog_instruction *inst = new_instruction(emitInfo, OPCODE_ARL);
457 if (inst) {
458 inst->SrcReg[0].File = file;
459 inst->SrcReg[0].Index = index;
460 inst->SrcReg[0].Swizzle = fix_swizzle(swizzle);
461 inst->DstReg.File = PROGRAM_ADDRESS;
462 inst->DstReg.Index = 0;
463 inst->DstReg.WriteMask = WRITEMASK_X;
464 }
465 return inst;
466 }
467
468
469 /**
470 * Emit a new instruction with given opcode, operands.
471 * At this point the instruction may have multiple indirect register
472 * loads/stores. We convert those into ARL loads and address-relative
473 * operands. See comments inside.
474 * At some point in the future we could directly emit indirectly addressed
475 * registers in Mesa GPU instructions.
476 */
477 static struct prog_instruction *
478 emit_instruction(slang_emit_info *emitInfo,
479 gl_inst_opcode opcode,
480 const slang_ir_storage *dst,
481 const slang_ir_storage *src0,
482 const slang_ir_storage *src1,
483 const slang_ir_storage *src2)
484 {
485 struct prog_instruction *inst;
486 GLuint numIndirect = 0;
487 const slang_ir_storage *src[3];
488 slang_ir_storage newSrc[3], newDst;
489 GLuint i;
490 GLboolean isTemp[3];
491
492 isTemp[0] = isTemp[1] = isTemp[2] = GL_FALSE;
493
494 src[0] = src0;
495 src[1] = src1;
496 src[2] = src2;
497
498 /* count up how many operands are indirect loads */
499 for (i = 0; i < 3; i++) {
500 if (src[i] && src[i]->IsIndirect)
501 numIndirect++;
502 }
503 if (dst && dst->IsIndirect)
504 numIndirect++;
505
506 /* Take special steps for indirect register loads.
507 * If we had multiple address registers this would be simpler.
508 * For example, this GLSL code:
509 * x[i] = y[j] + z[k];
510 * would translate into something like:
511 * ARL ADDR.x, i;
512 * ARL ADDR.y, j;
513 * ARL ADDR.z, k;
514 * ADD TEMP[ADDR.x+5], TEMP[ADDR.y+9], TEMP[ADDR.z+4];
515 * But since we currently only have one address register we have to do this:
516 * ARL ADDR.x, i;
517 * MOV t1, TEMP[ADDR.x+9];
518 * ARL ADDR.x, j;
519 * MOV t2, TEMP[ADDR.x+4];
520 * ARL ADDR.x, k;
521 * ADD TEMP[ADDR.x+5], t1, t2;
522 * The code here figures this out...
523 */
524 if (numIndirect > 0) {
525 for (i = 0; i < 3; i++) {
526 if (src[i] && src[i]->IsIndirect) {
527 /* load the ARL register with the indirect register */
528 emit_arl_load(emitInfo,
529 src[i]->IndirectFile,
530 src[i]->IndirectIndex,
531 src[i]->IndirectSwizzle);
532
533 if (numIndirect > 1) {
534 /* Need to load src[i] into a temporary register */
535 slang_ir_storage srcRelAddr;
536 alloc_local_temp(emitInfo, &newSrc[i], src[i]->Size);
537 isTemp[i] = GL_TRUE;
538
539 /* set RelAddr flag on src register */
540 srcRelAddr = *src[i];
541 srcRelAddr.RelAddr = GL_TRUE;
542 srcRelAddr.IsIndirect = GL_FALSE; /* not really needed */
543
544 /* MOV newSrc, srcRelAddr; */
545 inst = emit_instruction(emitInfo,
546 OPCODE_MOV,
547 &newSrc[i],
548 &srcRelAddr,
549 NULL,
550 NULL);
551
552 src[i] = &newSrc[i];
553 }
554 else {
555 /* just rewrite the src[i] storage to be ARL-relative */
556 newSrc[i] = *src[i];
557 newSrc[i].RelAddr = GL_TRUE;
558 newSrc[i].IsIndirect = GL_FALSE; /* not really needed */
559 src[i] = &newSrc[i];
560 }
561 }
562 }
563 }
564
565 /* Take special steps for indirect dest register write */
566 if (dst && dst->IsIndirect) {
567 /* load the ARL register with the indirect register */
568 emit_arl_load(emitInfo,
569 dst->IndirectFile,
570 dst->IndirectIndex,
571 dst->IndirectSwizzle);
572 newDst = *dst;
573 newDst.RelAddr = GL_TRUE;
574 newDst.IsIndirect = GL_FALSE;
575 dst = &newDst;
576 }
577
578 /* OK, emit the instruction and its dst, src regs */
579 inst = new_instruction(emitInfo, opcode);
580 if (!inst)
581 return NULL;
582
583 if (dst)
584 storage_to_dst_reg(&inst->DstReg, dst);
585
586 for (i = 0; i < 3; i++) {
587 if (src[i])
588 storage_to_src_reg(&inst->SrcReg[i], src[i]);
589 }
590
591 /* Free any temp registers that we allocated above */
592 for (i = 0; i < 3; i++) {
593 if (isTemp[i])
594 _slang_free_temp(emitInfo->vt, &newSrc[i]);
595 }
596
597 return inst;
598 }
599
600
601
602 /**
603 * Put a comment on the given instruction.
604 */
605 static void
606 inst_comment(struct prog_instruction *inst, const char *comment)
607 {
608 if (inst)
609 inst->Comment = _mesa_strdup(comment);
610 }
611
612
613
614 /**
615 * Return pointer to last instruction in program.
616 */
617 static struct prog_instruction *
618 prev_instruction(slang_emit_info *emitInfo)
619 {
620 struct gl_program *prog = emitInfo->prog;
621 if (prog->NumInstructions == 0)
622 return NULL;
623 else
624 return prog->Instructions + prog->NumInstructions - 1;
625 }
626
627
628 static struct prog_instruction *
629 emit(slang_emit_info *emitInfo, slang_ir_node *n);
630
631
632 /**
633 * Return an annotation string for given node's storage.
634 */
635 static char *
636 storage_annotation(const slang_ir_node *n, const struct gl_program *prog)
637 {
638 #if ANNOTATE
639 const slang_ir_storage *st = n->Store;
640 static char s[100] = "";
641
642 if (!st)
643 return _mesa_strdup("");
644
645 switch (st->File) {
646 case PROGRAM_CONSTANT:
647 if (st->Index >= 0) {
648 const GLfloat *val = prog->Parameters->ParameterValues[st->Index];
649 if (st->Swizzle == SWIZZLE_NOOP)
650 sprintf(s, "{%g, %g, %g, %g}", val[0], val[1], val[2], val[3]);
651 else {
652 sprintf(s, "%g", val[GET_SWZ(st->Swizzle, 0)]);
653 }
654 }
655 break;
656 case PROGRAM_TEMPORARY:
657 if (n->Var)
658 sprintf(s, "%s", (char *) n->Var->a_name);
659 else
660 sprintf(s, "t[%d]", st->Index);
661 break;
662 case PROGRAM_STATE_VAR:
663 case PROGRAM_UNIFORM:
664 sprintf(s, "%s", prog->Parameters->Parameters[st->Index].Name);
665 break;
666 case PROGRAM_VARYING:
667 sprintf(s, "%s", prog->Varying->Parameters[st->Index].Name);
668 break;
669 case PROGRAM_INPUT:
670 sprintf(s, "input[%d]", st->Index);
671 break;
672 case PROGRAM_OUTPUT:
673 sprintf(s, "output[%d]", st->Index);
674 break;
675 default:
676 s[0] = 0;
677 }
678 return _mesa_strdup(s);
679 #else
680 return NULL;
681 #endif
682 }
683
684
685 /**
686 * Return an annotation string for an instruction.
687 */
688 static char *
689 instruction_annotation(gl_inst_opcode opcode, char *dstAnnot,
690 char *srcAnnot0, char *srcAnnot1, char *srcAnnot2)
691 {
692 #if ANNOTATE
693 const char *operator;
694 char *s;
695 int len = 50;
696
697 if (dstAnnot)
698 len += strlen(dstAnnot);
699 else
700 dstAnnot = _mesa_strdup("");
701
702 if (srcAnnot0)
703 len += strlen(srcAnnot0);
704 else
705 srcAnnot0 = _mesa_strdup("");
706
707 if (srcAnnot1)
708 len += strlen(srcAnnot1);
709 else
710 srcAnnot1 = _mesa_strdup("");
711
712 if (srcAnnot2)
713 len += strlen(srcAnnot2);
714 else
715 srcAnnot2 = _mesa_strdup("");
716
717 switch (opcode) {
718 case OPCODE_ADD:
719 operator = "+";
720 break;
721 case OPCODE_SUB:
722 operator = "-";
723 break;
724 case OPCODE_MUL:
725 operator = "*";
726 break;
727 case OPCODE_DP2:
728 operator = "DP2";
729 break;
730 case OPCODE_DP3:
731 operator = "DP3";
732 break;
733 case OPCODE_DP4:
734 operator = "DP4";
735 break;
736 case OPCODE_XPD:
737 operator = "XPD";
738 break;
739 case OPCODE_RSQ:
740 operator = "RSQ";
741 break;
742 case OPCODE_SGT:
743 operator = ">";
744 break;
745 default:
746 operator = ",";
747 }
748
749 s = (char *) malloc(len);
750 sprintf(s, "%s = %s %s %s %s", dstAnnot,
751 srcAnnot0, operator, srcAnnot1, srcAnnot2);
752 assert(_mesa_strlen(s) < len);
753
754 free(dstAnnot);
755 free(srcAnnot0);
756 free(srcAnnot1);
757 free(srcAnnot2);
758
759 return s;
760 #else
761 return NULL;
762 #endif
763 }
764
765
766 /**
767 * Emit an instruction that's just a comment.
768 */
769 static struct prog_instruction *
770 emit_comment(slang_emit_info *emitInfo, const char *comment)
771 {
772 struct prog_instruction *inst = new_instruction(emitInfo, OPCODE_NOP);
773 if (inst) {
774 inst_comment(inst, comment);
775 }
776 return inst;
777 }
778
779
780 /**
781 * Generate code for a simple arithmetic instruction.
782 * Either 1, 2 or 3 operands.
783 */
784 static struct prog_instruction *
785 emit_arith(slang_emit_info *emitInfo, slang_ir_node *n)
786 {
787 const slang_ir_info *info = _slang_ir_info(n->Opcode);
788 struct prog_instruction *inst;
789 GLuint i;
790
791 assert(info);
792 assert(info->InstOpcode != OPCODE_NOP);
793
794 #if PEEPHOLE_OPTIMIZATIONS
795 /* Look for MAD opportunity */
796 if (info->NumParams == 2 &&
797 n->Opcode == IR_ADD && n->Children[0]->Opcode == IR_MUL) {
798 /* found pattern IR_ADD(IR_MUL(A, B), C) */
799 emit(emitInfo, n->Children[0]->Children[0]); /* A */
800 emit(emitInfo, n->Children[0]->Children[1]); /* B */
801 emit(emitInfo, n->Children[1]); /* C */
802 alloc_node_storage(emitInfo, n, -1); /* dest */
803
804 inst = emit_instruction(emitInfo,
805 OPCODE_MAD,
806 n->Store,
807 n->Children[0]->Children[0]->Store,
808 n->Children[0]->Children[1]->Store,
809 n->Children[1]->Store);
810
811 free_node_storage(emitInfo->vt, n->Children[0]->Children[0]);
812 free_node_storage(emitInfo->vt, n->Children[0]->Children[1]);
813 free_node_storage(emitInfo->vt, n->Children[1]);
814 return inst;
815 }
816
817 if (info->NumParams == 2 &&
818 n->Opcode == IR_ADD && n->Children[1]->Opcode == IR_MUL) {
819 /* found pattern IR_ADD(A, IR_MUL(B, C)) */
820 emit(emitInfo, n->Children[0]); /* A */
821 emit(emitInfo, n->Children[1]->Children[0]); /* B */
822 emit(emitInfo, n->Children[1]->Children[1]); /* C */
823 alloc_node_storage(emitInfo, n, -1); /* dest */
824
825 inst = emit_instruction(emitInfo,
826 OPCODE_MAD,
827 n->Store,
828 n->Children[1]->Children[0]->Store,
829 n->Children[1]->Children[1]->Store,
830 n->Children[0]->Store);
831
832 free_node_storage(emitInfo->vt, n->Children[1]->Children[0]);
833 free_node_storage(emitInfo->vt, n->Children[1]->Children[1]);
834 free_node_storage(emitInfo->vt, n->Children[0]);
835 return inst;
836 }
837 #endif
838
839 /* gen code for children, may involve temp allocation */
840 for (i = 0; i < info->NumParams; i++) {
841 emit(emitInfo, n->Children[i]);
842 if (!n->Children[i] || !n->Children[i]->Store) {
843 /* error recovery */
844 return NULL;
845 }
846 }
847
848 /* result storage */
849 alloc_node_storage(emitInfo, n, -1);
850
851 inst = emit_instruction(emitInfo,
852 info->InstOpcode,
853 n->Store, /* dest */
854 (info->NumParams > 0 ? n->Children[0]->Store : NULL),
855 (info->NumParams > 1 ? n->Children[1]->Store : NULL),
856 (info->NumParams > 2 ? n->Children[2]->Store : NULL)
857 );
858
859 /* free temps */
860 for (i = 0; i < info->NumParams; i++)
861 free_node_storage(emitInfo->vt, n->Children[i]);
862
863 return inst;
864 }
865
866
867 /**
868 * Emit code for == and != operators. These could normally be handled
869 * by emit_arith() except we need to be able to handle structure comparisons.
870 */
871 static struct prog_instruction *
872 emit_compare(slang_emit_info *emitInfo, slang_ir_node *n)
873 {
874 struct prog_instruction *inst = NULL;
875 GLint size;
876
877 assert(n->Opcode == IR_EQUAL || n->Opcode == IR_NOTEQUAL);
878
879 /* gen code for children */
880 emit(emitInfo, n->Children[0]);
881 emit(emitInfo, n->Children[1]);
882
883 if (n->Children[0]->Store->Size != n->Children[1]->Store->Size) {
884 /* XXX this error should have been caught in slang_codegen.c */
885 slang_info_log_error(emitInfo->log, "invalid operands to == or !=");
886 n->Store = NULL;
887 return NULL;
888 }
889
890 /* final result is 1 bool */
891 if (!alloc_node_storage(emitInfo, n, 1))
892 return NULL;
893
894 size = n->Children[0]->Store->Size;
895
896 if (size == 1) {
897 gl_inst_opcode opcode = n->Opcode == IR_EQUAL ? OPCODE_SEQ : OPCODE_SNE;
898 inst = emit_instruction(emitInfo,
899 opcode,
900 n->Store, /* dest */
901 n->Children[0]->Store,
902 n->Children[1]->Store,
903 NULL);
904 }
905 else if (size <= 4) {
906 /* compare two vectors.
907 * Unfortunately, there's no instruction to compare vectors and
908 * return a scalar result. Do it with some compare and dot product
909 * instructions...
910 */
911 GLuint swizzle;
912 gl_inst_opcode dotOp;
913 slang_ir_storage tempStore;
914
915 if (!alloc_local_temp(emitInfo, &tempStore, 4)) {
916 n->Store = NULL;
917 return NULL;
918 /* out of temps */
919 }
920
921 if (size == 4) {
922 dotOp = OPCODE_DP4;
923 swizzle = SWIZZLE_XYZW;
924 }
925 else if (size == 3) {
926 dotOp = OPCODE_DP3;
927 swizzle = SWIZZLE_XYZW;
928 }
929 else {
930 assert(size == 2);
931 dotOp = OPCODE_DP3; /* XXX use OPCODE_DP2 eventually */
932 swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y);
933 }
934
935 /* Compute inequality (temp = (A != B)) */
936 inst = emit_instruction(emitInfo,
937 OPCODE_SNE,
938 &tempStore,
939 n->Children[0]->Store,
940 n->Children[1]->Store,
941 NULL);
942 inst_comment(inst, "Compare values");
943
944 /* Compute val = DOT(temp, temp) (reduction) */
945 inst = emit_instruction(emitInfo,
946 dotOp,
947 n->Store,
948 &tempStore,
949 &tempStore,
950 NULL);
951 inst->SrcReg[0].Swizzle = inst->SrcReg[1].Swizzle = swizzle; /*override*/
952 inst_comment(inst, "Reduce vec to bool");
953
954 _slang_free_temp(emitInfo->vt, &tempStore); /* free temp */
955
956 if (n->Opcode == IR_EQUAL) {
957 /* compute val = !val.x with SEQ val, val, 0; */
958 slang_ir_storage zero;
959 constant_to_storage(emitInfo, 0.0, &zero);
960 inst = emit_instruction(emitInfo,
961 OPCODE_SEQ,
962 n->Store, /* dest */
963 n->Store,
964 &zero,
965 NULL);
966 inst_comment(inst, "Invert true/false");
967 }
968 }
969 else {
970 /* size > 4, struct or array compare.
971 * XXX this won't work reliably for structs with padding!!
972 */
973 GLint i, num = (n->Children[0]->Store->Size + 3) / 4;
974 slang_ir_storage accTemp, sneTemp;
975
976 if (!alloc_local_temp(emitInfo, &accTemp, 4))
977 return NULL;
978
979 if (!alloc_local_temp(emitInfo, &sneTemp, 4))
980 return NULL;
981
982 for (i = 0; i < num; i++) {
983 slang_ir_storage srcStore0 = *n->Children[0]->Store;
984 slang_ir_storage srcStore1 = *n->Children[1]->Store;
985 srcStore0.Index += i;
986 srcStore1.Index += i;
987
988 if (i == 0) {
989 /* SNE accTemp, left[i], right[i] */
990 inst = emit_instruction(emitInfo, OPCODE_SNE,
991 &accTemp, /* dest */
992 &srcStore0,
993 &srcStore1,
994 NULL);
995 inst_comment(inst, "Begin struct/array comparison");
996 }
997 else {
998 /* SNE sneTemp, left[i], right[i] */
999 inst = emit_instruction(emitInfo, OPCODE_SNE,
1000 &sneTemp, /* dest */
1001 &srcStore0,
1002 &srcStore1,
1003 NULL);
1004 /* ADD accTemp, accTemp, sneTemp; # like logical-OR */
1005 inst = emit_instruction(emitInfo, OPCODE_ADD,
1006 &accTemp, /* dest */
1007 &accTemp,
1008 &sneTemp,
1009 NULL);
1010 }
1011 }
1012
1013 /* compute accTemp.x || accTemp.y || accTemp.z || accTemp.w with DOT4 */
1014 inst = emit_instruction(emitInfo, OPCODE_DP4,
1015 n->Store,
1016 &accTemp,
1017 &accTemp,
1018 NULL);
1019 inst_comment(inst, "End struct/array comparison");
1020
1021 if (n->Opcode == IR_EQUAL) {
1022 /* compute tmp.x = !tmp.x via tmp.x = (tmp.x == 0) */
1023 slang_ir_storage zero;
1024 constant_to_storage(emitInfo, 0.0, &zero);
1025 inst = emit_instruction(emitInfo, OPCODE_SEQ,
1026 n->Store, /* dest */
1027 n->Store,
1028 &zero,
1029 NULL);
1030 inst_comment(inst, "Invert true/false");
1031 }
1032
1033 _slang_free_temp(emitInfo->vt, &accTemp);
1034 _slang_free_temp(emitInfo->vt, &sneTemp);
1035 }
1036
1037 /* free temps */
1038 free_node_storage(emitInfo->vt, n->Children[0]);
1039 free_node_storage(emitInfo->vt, n->Children[1]);
1040
1041 return inst;
1042 }
1043
1044
1045
1046 /**
1047 * Generate code for an IR_CLAMP instruction.
1048 */
1049 static struct prog_instruction *
1050 emit_clamp(slang_emit_info *emitInfo, slang_ir_node *n)
1051 {
1052 struct prog_instruction *inst;
1053 slang_ir_node tmpNode;
1054
1055 assert(n->Opcode == IR_CLAMP);
1056 /* ch[0] = value
1057 * ch[1] = min limit
1058 * ch[2] = max limit
1059 */
1060
1061 inst = emit(emitInfo, n->Children[0]);
1062
1063 /* If lower limit == 0.0 and upper limit == 1.0,
1064 * set prev instruction's SaturateMode field to SATURATE_ZERO_ONE.
1065 * Else,
1066 * emit OPCODE_MIN, OPCODE_MAX sequence.
1067 */
1068 #if 0
1069 /* XXX this isn't quite finished yet */
1070 if (n->Children[1]->Opcode == IR_FLOAT &&
1071 n->Children[1]->Value[0] == 0.0 &&
1072 n->Children[1]->Value[1] == 0.0 &&
1073 n->Children[1]->Value[2] == 0.0 &&
1074 n->Children[1]->Value[3] == 0.0 &&
1075 n->Children[2]->Opcode == IR_FLOAT &&
1076 n->Children[2]->Value[0] == 1.0 &&
1077 n->Children[2]->Value[1] == 1.0 &&
1078 n->Children[2]->Value[2] == 1.0 &&
1079 n->Children[2]->Value[3] == 1.0) {
1080 if (!inst) {
1081 inst = prev_instruction(prog);
1082 }
1083 if (inst && inst->Opcode != OPCODE_NOP) {
1084 /* and prev instruction's DstReg matches n->Children[0]->Store */
1085 inst->SaturateMode = SATURATE_ZERO_ONE;
1086 n->Store = n->Children[0]->Store;
1087 return inst;
1088 }
1089 }
1090 #endif
1091
1092 if (!alloc_node_storage(emitInfo, n, n->Children[0]->Store->Size))
1093 return NULL;
1094
1095 emit(emitInfo, n->Children[1]);
1096 emit(emitInfo, n->Children[2]);
1097
1098 /* Some GPUs don't allow reading from output registers. So if the
1099 * dest for this clamp() is an output reg, we can't use that reg for
1100 * the intermediate result. Use a temp register instead.
1101 */
1102 _mesa_bzero(&tmpNode, sizeof(tmpNode));
1103 alloc_node_storage(emitInfo, &tmpNode, n->Store->Size);
1104
1105 /* tmp = max(ch[0], ch[1]) */
1106 inst = emit_instruction(emitInfo, OPCODE_MAX,
1107 tmpNode.Store, /* dest */
1108 n->Children[0]->Store,
1109 n->Children[1]->Store,
1110 NULL);
1111
1112 /* n->dest = min(tmp, ch[2]) */
1113 inst = emit_instruction(emitInfo, OPCODE_MIN,
1114 n->Store, /* dest */
1115 tmpNode.Store,
1116 n->Children[2]->Store,
1117 NULL);
1118
1119 free_node_storage(emitInfo->vt, &tmpNode);
1120
1121 return inst;
1122 }
1123
1124
1125 static struct prog_instruction *
1126 emit_negation(slang_emit_info *emitInfo, slang_ir_node *n)
1127 {
1128 /* Implement as MOV dst, -src; */
1129 /* XXX we could look at the previous instruction and in some circumstances
1130 * modify it to accomplish the negation.
1131 */
1132 struct prog_instruction *inst;
1133
1134 emit(emitInfo, n->Children[0]);
1135
1136 if (!alloc_node_storage(emitInfo, n, n->Children[0]->Store->Size))
1137 return NULL;
1138
1139 inst = emit_instruction(emitInfo,
1140 OPCODE_MOV,
1141 n->Store, /* dest */
1142 n->Children[0]->Store,
1143 NULL,
1144 NULL);
1145 inst->SrcReg[0].Negate = NEGATE_XYZW;
1146 return inst;
1147 }
1148
1149
1150 static struct prog_instruction *
1151 emit_label(slang_emit_info *emitInfo, const slang_ir_node *n)
1152 {
1153 assert(n->Label);
1154 #if 0
1155 /* XXX this fails in loop tail code - investigate someday */
1156 assert(_slang_label_get_location(n->Label) < 0);
1157 _slang_label_set_location(n->Label, emitInfo->prog->NumInstructions,
1158 emitInfo->prog);
1159 #else
1160 if (_slang_label_get_location(n->Label) < 0)
1161 _slang_label_set_location(n->Label, emitInfo->prog->NumInstructions,
1162 emitInfo->prog);
1163 #endif
1164 return NULL;
1165 }
1166
1167
1168 /**
1169 * Emit code for a function call.
1170 * Note that for each time a function is called, we emit the function's
1171 * body code again because the set of available registers may be different.
1172 */
1173 static struct prog_instruction *
1174 emit_fcall(slang_emit_info *emitInfo, slang_ir_node *n)
1175 {
1176 struct gl_program *progSave;
1177 struct prog_instruction *inst;
1178 GLuint subroutineId;
1179 GLuint maxInstSave;
1180
1181 assert(n->Opcode == IR_CALL);
1182 assert(n->Label);
1183
1184 /* save/push cur program */
1185 maxInstSave = emitInfo->MaxInstructions;
1186 progSave = emitInfo->prog;
1187
1188 emitInfo->prog = new_subroutine(emitInfo, &subroutineId);
1189 emitInfo->MaxInstructions = emitInfo->prog->NumInstructions;
1190
1191 _slang_label_set_location(n->Label, emitInfo->prog->NumInstructions,
1192 emitInfo->prog);
1193
1194 if (emitInfo->EmitBeginEndSub) {
1195 /* BGNSUB isn't a real instruction.
1196 * We require a label (i.e. "foobar:") though, if we're going to
1197 * print the program in the NV format. The BNGSUB instruction is
1198 * really just a NOP to attach the label to.
1199 */
1200 inst = new_instruction(emitInfo, OPCODE_BGNSUB);
1201 if (!inst) {
1202 return NULL;
1203 }
1204 inst_comment(inst, n->Label->Name);
1205 }
1206
1207 /* body of function: */
1208 emit(emitInfo, n->Children[0]);
1209 n->Store = n->Children[0]->Store;
1210
1211 /* add RET instruction now, if needed */
1212 inst = prev_instruction(emitInfo);
1213 if (inst && inst->Opcode != OPCODE_RET) {
1214 inst = new_instruction(emitInfo, OPCODE_RET);
1215 if (!inst) {
1216 return NULL;
1217 }
1218 }
1219
1220 if (emitInfo->EmitBeginEndSub) {
1221 inst = new_instruction(emitInfo, OPCODE_ENDSUB);
1222 if (!inst) {
1223 return NULL;
1224 }
1225 inst_comment(inst, n->Label->Name);
1226 }
1227
1228 /* pop/restore cur program */
1229 emitInfo->prog = progSave;
1230 emitInfo->MaxInstructions = maxInstSave;
1231
1232 /* emit the function call */
1233 inst = new_instruction(emitInfo, OPCODE_CAL);
1234 if (!inst) {
1235 return NULL;
1236 }
1237 /* The branch target is just the subroutine number (changed later) */
1238 inst->BranchTarget = subroutineId;
1239 inst_comment(inst, n->Label->Name);
1240 assert(inst->BranchTarget >= 0);
1241
1242 return inst;
1243 }
1244
1245
1246 /**
1247 * Emit code for a 'return' statement.
1248 */
1249 static struct prog_instruction *
1250 emit_return(slang_emit_info *emitInfo, slang_ir_node *n)
1251 {
1252 struct prog_instruction *inst;
1253 assert(n);
1254 assert(n->Opcode == IR_RETURN);
1255 assert(n->Label);
1256 inst = new_instruction(emitInfo, OPCODE_RET);
1257 if (inst) {
1258 inst->DstReg.CondMask = COND_TR; /* always return */
1259 }
1260 return inst;
1261 }
1262
1263
1264 static struct prog_instruction *
1265 emit_kill(slang_emit_info *emitInfo)
1266 {
1267 struct gl_fragment_program *fp;
1268 struct prog_instruction *inst;
1269 /* NV-KILL - discard fragment depending on condition code.
1270 * Note that ARB-KILL depends on sign of vector operand.
1271 */
1272 inst = new_instruction(emitInfo, OPCODE_KIL_NV);
1273 if (!inst) {
1274 return NULL;
1275 }
1276 inst->DstReg.CondMask = COND_TR; /* always kill */
1277
1278 assert(emitInfo->prog->Target == GL_FRAGMENT_PROGRAM_ARB);
1279 fp = (struct gl_fragment_program *) emitInfo->prog;
1280 fp->UsesKill = GL_TRUE;
1281
1282 return inst;
1283 }
1284
1285
1286 static struct prog_instruction *
1287 emit_tex(slang_emit_info *emitInfo, slang_ir_node *n)
1288 {
1289 struct prog_instruction *inst;
1290 gl_inst_opcode opcode;
1291 GLboolean shadow = GL_FALSE;
1292
1293 switch (n->Opcode) {
1294 case IR_TEX:
1295 opcode = OPCODE_TEX;
1296 break;
1297 case IR_TEX_SH:
1298 opcode = OPCODE_TEX;
1299 shadow = GL_TRUE;
1300 break;
1301 case IR_TEXB:
1302 opcode = OPCODE_TXB;
1303 break;
1304 case IR_TEXB_SH:
1305 opcode = OPCODE_TXB;
1306 shadow = GL_TRUE;
1307 break;
1308 case IR_TEXP:
1309 opcode = OPCODE_TXP;
1310 break;
1311 case IR_TEXP_SH:
1312 opcode = OPCODE_TXP;
1313 shadow = GL_TRUE;
1314 break;
1315 default:
1316 _mesa_problem(NULL, "Bad IR TEX code");
1317 return NULL;
1318 }
1319
1320 if (n->Children[0]->Opcode == IR_ELEMENT) {
1321 /* array is the sampler (a uniform which'll indicate the texture unit) */
1322 assert(n->Children[0]->Children[0]->Store);
1323 assert(n->Children[0]->Children[0]->Store->File == PROGRAM_SAMPLER);
1324
1325 emit(emitInfo, n->Children[0]);
1326
1327 n->Children[0]->Var = n->Children[0]->Children[0]->Var;
1328 } else {
1329 /* this is the sampler (a uniform which'll indicate the texture unit) */
1330 assert(n->Children[0]->Store);
1331 assert(n->Children[0]->Store->File == PROGRAM_SAMPLER);
1332 }
1333
1334 /* emit code for the texcoord operand */
1335 (void) emit(emitInfo, n->Children[1]);
1336
1337 /* alloc storage for result of texture fetch */
1338 if (!alloc_node_storage(emitInfo, n, 4))
1339 return NULL;
1340
1341 /* emit TEX instruction; Child[1] is the texcoord */
1342 inst = emit_instruction(emitInfo,
1343 opcode,
1344 n->Store,
1345 n->Children[1]->Store,
1346 NULL,
1347 NULL);
1348
1349 inst->TexShadow = shadow;
1350
1351 /* Store->Index is the uniform/sampler index */
1352 assert(n->Children[0]->Store->Index >= 0);
1353 inst->TexSrcUnit = n->Children[0]->Store->Index;
1354 inst->TexSrcTarget = n->Children[0]->Store->TexTarget;
1355
1356 /* mark the sampler as being used */
1357 _mesa_use_uniform(emitInfo->prog->Parameters,
1358 (char *) n->Children[0]->Var->a_name);
1359
1360 return inst;
1361 }
1362
1363
1364 /**
1365 * Assignment/copy
1366 */
1367 static struct prog_instruction *
1368 emit_copy(slang_emit_info *emitInfo, slang_ir_node *n)
1369 {
1370 struct prog_instruction *inst;
1371
1372 assert(n->Opcode == IR_COPY);
1373
1374 /* lhs */
1375 emit(emitInfo, n->Children[0]);
1376 if (!n->Children[0]->Store || n->Children[0]->Store->Index < 0) {
1377 /* an error should have been already recorded */
1378 return NULL;
1379 }
1380
1381 /* rhs */
1382 assert(n->Children[1]);
1383 inst = emit(emitInfo, n->Children[1]);
1384
1385 if (!n->Children[1]->Store || n->Children[1]->Store->Index < 0) {
1386 if (!emitInfo->log->text && !emitInfo->UnresolvedFunctions) {
1387 /* XXX this error should have been caught in slang_codegen.c */
1388 slang_info_log_error(emitInfo->log, "invalid assignment");
1389 }
1390 return NULL;
1391 }
1392
1393 assert(n->Children[1]->Store->Index >= 0);
1394
1395 /*assert(n->Children[0]->Store->Size == n->Children[1]->Store->Size);*/
1396
1397 n->Store = n->Children[0]->Store;
1398
1399 if (n->Store->File == PROGRAM_SAMPLER) {
1400 /* no code generated for sampler assignments,
1401 * just copy the sampler index/target at compile time.
1402 */
1403 n->Store->Index = n->Children[1]->Store->Index;
1404 n->Store->TexTarget = n->Children[1]->Store->TexTarget;
1405 return NULL;
1406 }
1407
1408 #if PEEPHOLE_OPTIMIZATIONS
1409 if (inst &&
1410 (n->Children[1]->Opcode != IR_SWIZZLE) &&
1411 _slang_is_temp(emitInfo->vt, n->Children[1]->Store) &&
1412 (inst->DstReg.File == n->Children[1]->Store->File) &&
1413 (inst->DstReg.Index == n->Children[1]->Store->Index) &&
1414 !n->Children[0]->Store->IsIndirect &&
1415 n->Children[0]->Store->Size <= 4) {
1416 /* Peephole optimization:
1417 * The Right-Hand-Side has its results in a temporary place.
1418 * Modify the RHS (and the prev instruction) to store its results
1419 * in the destination specified by n->Children[0].
1420 * Then, this MOVE is a no-op.
1421 * Ex:
1422 * MUL tmp, x, y;
1423 * MOV a, tmp;
1424 * becomes:
1425 * MUL a, x, y;
1426 */
1427
1428 /* fixup the previous instruction (which stored the RHS result) */
1429 assert(n->Children[0]->Store->Index >= 0);
1430 storage_to_dst_reg(&inst->DstReg, n->Children[0]->Store);
1431 return inst;
1432 }
1433 else
1434 #endif
1435 {
1436 if (n->Children[0]->Store->Size > 4) {
1437 /* move matrix/struct etc (block of registers) */
1438 slang_ir_storage dstStore = *n->Children[0]->Store;
1439 slang_ir_storage srcStore = *n->Children[1]->Store;
1440 GLint size = srcStore.Size;
1441 ASSERT(n->Children[1]->Store->Swizzle == SWIZZLE_NOOP);
1442 dstStore.Size = 4;
1443 srcStore.Size = 4;
1444 while (size >= 4) {
1445 inst = emit_instruction(emitInfo, OPCODE_MOV,
1446 &dstStore,
1447 &srcStore,
1448 NULL,
1449 NULL);
1450 inst_comment(inst, "IR_COPY block");
1451 srcStore.Index++;
1452 dstStore.Index++;
1453 size -= 4;
1454 }
1455 }
1456 else {
1457 /* single register move */
1458 char *srcAnnot, *dstAnnot;
1459 assert(n->Children[0]->Store->Index >= 0);
1460 inst = emit_instruction(emitInfo, OPCODE_MOV,
1461 n->Children[0]->Store, /* dest */
1462 n->Children[1]->Store,
1463 NULL,
1464 NULL);
1465 dstAnnot = storage_annotation(n->Children[0], emitInfo->prog);
1466 srcAnnot = storage_annotation(n->Children[1], emitInfo->prog);
1467 inst->Comment = instruction_annotation(inst->Opcode, dstAnnot,
1468 srcAnnot, NULL, NULL);
1469 }
1470 free_node_storage(emitInfo->vt, n->Children[1]);
1471 return inst;
1472 }
1473 }
1474
1475
1476 /**
1477 * An IR_COND node wraps a boolean expression which is used by an
1478 * IF or WHILE test. This is where we'll set condition codes, if needed.
1479 */
1480 static struct prog_instruction *
1481 emit_cond(slang_emit_info *emitInfo, slang_ir_node *n)
1482 {
1483 struct prog_instruction *inst;
1484
1485 assert(n->Opcode == IR_COND);
1486
1487 if (!n->Children[0])
1488 return NULL;
1489
1490 /* emit code for the expression */
1491 inst = emit(emitInfo, n->Children[0]);
1492
1493 if (!n->Children[0]->Store) {
1494 /* error recovery */
1495 return NULL;
1496 }
1497
1498 assert(n->Children[0]->Store);
1499 /*assert(n->Children[0]->Store->Size == 1);*/
1500
1501 if (emitInfo->EmitCondCodes) {
1502 if (inst &&
1503 n->Children[0]->Store &&
1504 inst->DstReg.File == n->Children[0]->Store->File &&
1505 inst->DstReg.Index == n->Children[0]->Store->Index) {
1506 /* The previous instruction wrote to the register who's value
1507 * we're testing. Just fix that instruction so that the
1508 * condition codes are computed.
1509 */
1510 inst->CondUpdate = GL_TRUE;
1511 n->Store = n->Children[0]->Store;
1512 return inst;
1513 }
1514 else {
1515 /* This'll happen for things like "if (i) ..." where no code
1516 * is normally generated for the expression "i".
1517 * Generate a move instruction just to set condition codes.
1518 */
1519 if (!alloc_node_storage(emitInfo, n, 1))
1520 return NULL;
1521 inst = emit_instruction(emitInfo, OPCODE_MOV,
1522 n->Store, /* dest */
1523 n->Children[0]->Store,
1524 NULL,
1525 NULL);
1526 inst->CondUpdate = GL_TRUE;
1527 inst_comment(inst, "COND expr");
1528 _slang_free_temp(emitInfo->vt, n->Store);
1529 return inst;
1530 }
1531 }
1532 else {
1533 /* No-op: the boolean result of the expression is in a regular reg */
1534 n->Store = n->Children[0]->Store;
1535 return inst;
1536 }
1537 }
1538
1539
1540 /**
1541 * Logical-NOT
1542 */
1543 static struct prog_instruction *
1544 emit_not(slang_emit_info *emitInfo, slang_ir_node *n)
1545 {
1546 static const struct {
1547 gl_inst_opcode op, opNot;
1548 } operators[] = {
1549 { OPCODE_SLT, OPCODE_SGE },
1550 { OPCODE_SLE, OPCODE_SGT },
1551 { OPCODE_SGT, OPCODE_SLE },
1552 { OPCODE_SGE, OPCODE_SLT },
1553 { OPCODE_SEQ, OPCODE_SNE },
1554 { OPCODE_SNE, OPCODE_SEQ },
1555 { 0, 0 }
1556 };
1557 struct prog_instruction *inst;
1558 slang_ir_storage zero;
1559 GLuint i;
1560
1561 /* child expr */
1562 inst = emit(emitInfo, n->Children[0]);
1563
1564 #if PEEPHOLE_OPTIMIZATIONS
1565 if (inst) {
1566 /* if the prev instruction was a comparison instruction, invert it */
1567 for (i = 0; operators[i].op; i++) {
1568 if (inst->Opcode == operators[i].op) {
1569 inst->Opcode = operators[i].opNot;
1570 n->Store = n->Children[0]->Store;
1571 return inst;
1572 }
1573 }
1574 }
1575 #endif
1576
1577 /* else, invert using SEQ (v = v == 0) */
1578 if (!alloc_node_storage(emitInfo, n, n->Children[0]->Store->Size))
1579 return NULL;
1580
1581 constant_to_storage(emitInfo, 0.0, &zero);
1582 inst = emit_instruction(emitInfo,
1583 OPCODE_SEQ,
1584 n->Store,
1585 n->Children[0]->Store,
1586 &zero,
1587 NULL);
1588 inst_comment(inst, "NOT");
1589
1590 free_node_storage(emitInfo->vt, n->Children[0]);
1591
1592 return inst;
1593 }
1594
1595
1596 static struct prog_instruction *
1597 emit_if(slang_emit_info *emitInfo, slang_ir_node *n)
1598 {
1599 struct gl_program *prog = emitInfo->prog;
1600 GLuint ifInstLoc, elseInstLoc = 0;
1601 GLuint condWritemask = 0;
1602
1603 /* emit condition expression code */
1604 {
1605 struct prog_instruction *inst;
1606 inst = emit(emitInfo, n->Children[0]);
1607 if (emitInfo->EmitCondCodes) {
1608 if (!inst) {
1609 /* error recovery */
1610 return NULL;
1611 }
1612 condWritemask = inst->DstReg.WriteMask;
1613 }
1614 }
1615
1616 if (!n->Children[0]->Store)
1617 return NULL;
1618
1619 #if 0
1620 assert(n->Children[0]->Store->Size == 1); /* a bool! */
1621 #endif
1622
1623 ifInstLoc = prog->NumInstructions;
1624 if (emitInfo->EmitHighLevelInstructions) {
1625 if (emitInfo->EmitCondCodes) {
1626 /* IF condcode THEN ... */
1627 struct prog_instruction *ifInst = new_instruction(emitInfo, OPCODE_IF);
1628 if (!ifInst) {
1629 return NULL;
1630 }
1631 ifInst->DstReg.CondMask = COND_NE; /* if cond is non-zero */
1632 /* only test the cond code (1 of 4) that was updated by the
1633 * previous instruction.
1634 */
1635 ifInst->DstReg.CondSwizzle = writemask_to_swizzle(condWritemask);
1636 }
1637 else {
1638 /* IF src[0] THEN ... */
1639 emit_instruction(emitInfo, OPCODE_IF,
1640 NULL, /* dst */
1641 n->Children[0]->Store, /* op0 */
1642 NULL,
1643 NULL);
1644 }
1645 }
1646 else {
1647 /* conditional jump to else, or endif */
1648 struct prog_instruction *ifInst = new_instruction(emitInfo, OPCODE_BRA);
1649 if (!ifInst) {
1650 return NULL;
1651 }
1652 ifInst->DstReg.CondMask = COND_EQ; /* BRA if cond is zero */
1653 inst_comment(ifInst, "if zero");
1654 ifInst->DstReg.CondSwizzle = writemask_to_swizzle(condWritemask);
1655 }
1656
1657 /* if body */
1658 emit(emitInfo, n->Children[1]);
1659
1660 if (n->Children[2]) {
1661 /* have else body */
1662 elseInstLoc = prog->NumInstructions;
1663 if (emitInfo->EmitHighLevelInstructions) {
1664 struct prog_instruction *inst = new_instruction(emitInfo, OPCODE_ELSE);
1665 if (!inst) {
1666 return NULL;
1667 }
1668 }
1669 else {
1670 /* jump to endif instruction */
1671 struct prog_instruction *inst = new_instruction(emitInfo, OPCODE_BRA);
1672 if (!inst) {
1673 return NULL;
1674 }
1675 inst_comment(inst, "else");
1676 inst->DstReg.CondMask = COND_TR; /* always branch */
1677 }
1678 prog->Instructions[ifInstLoc].BranchTarget = prog->NumInstructions;
1679 emit(emitInfo, n->Children[2]);
1680 }
1681 else {
1682 /* no else body */
1683 prog->Instructions[ifInstLoc].BranchTarget = prog->NumInstructions;
1684 }
1685
1686 if (emitInfo->EmitHighLevelInstructions) {
1687 struct prog_instruction *inst = new_instruction(emitInfo, OPCODE_ENDIF);
1688 if (!inst) {
1689 return NULL;
1690 }
1691 }
1692
1693 if (n->Children[2]) {
1694 prog->Instructions[elseInstLoc].BranchTarget = prog->NumInstructions;
1695 }
1696 return NULL;
1697 }
1698
1699
1700 static struct prog_instruction *
1701 emit_loop(slang_emit_info *emitInfo, slang_ir_node *n)
1702 {
1703 struct gl_program *prog = emitInfo->prog;
1704 struct prog_instruction *endInst;
1705 GLuint beginInstLoc, tailInstLoc, endInstLoc;
1706 slang_ir_node *ir;
1707
1708 /* emit OPCODE_BGNLOOP */
1709 beginInstLoc = prog->NumInstructions;
1710 if (emitInfo->EmitHighLevelInstructions) {
1711 struct prog_instruction *inst = new_instruction(emitInfo, OPCODE_BGNLOOP);
1712 if (!inst) {
1713 return NULL;
1714 }
1715 }
1716
1717 /* body */
1718 emit(emitInfo, n->Children[0]);
1719
1720 /* tail */
1721 tailInstLoc = prog->NumInstructions;
1722 if (n->Children[1]) {
1723 if (emitInfo->EmitComments)
1724 emit_comment(emitInfo, "Loop tail code:");
1725 emit(emitInfo, n->Children[1]);
1726 }
1727
1728 endInstLoc = prog->NumInstructions;
1729 if (emitInfo->EmitHighLevelInstructions) {
1730 /* emit OPCODE_ENDLOOP */
1731 endInst = new_instruction(emitInfo, OPCODE_ENDLOOP);
1732 if (!endInst) {
1733 return NULL;
1734 }
1735 }
1736 else {
1737 /* emit unconditional BRA-nch */
1738 endInst = new_instruction(emitInfo, OPCODE_BRA);
1739 if (!endInst) {
1740 return NULL;
1741 }
1742 endInst->DstReg.CondMask = COND_TR; /* always true */
1743 }
1744 /* ENDLOOP's BranchTarget points to the BGNLOOP inst */
1745 endInst->BranchTarget = beginInstLoc;
1746
1747 if (emitInfo->EmitHighLevelInstructions) {
1748 /* BGNLOOP's BranchTarget points to the ENDLOOP inst */
1749 prog->Instructions[beginInstLoc].BranchTarget = prog->NumInstructions -1;
1750 }
1751
1752 /* Done emitting loop code. Now walk over the loop's linked list of
1753 * BREAK and CONT nodes, filling in their BranchTarget fields (which
1754 * will point to the ENDLOOP+1 or BGNLOOP instructions, respectively).
1755 */
1756 for (ir = n->List; ir; ir = ir->List) {
1757 struct prog_instruction *inst = prog->Instructions + ir->InstLocation;
1758 assert(inst->BranchTarget < 0);
1759 if (ir->Opcode == IR_BREAK ||
1760 ir->Opcode == IR_BREAK_IF_TRUE) {
1761 assert(inst->Opcode == OPCODE_BRK ||
1762 inst->Opcode == OPCODE_BRA);
1763 /* go to instruction after end of loop */
1764 inst->BranchTarget = endInstLoc + 1;
1765 }
1766 else {
1767 assert(ir->Opcode == IR_CONT ||
1768 ir->Opcode == IR_CONT_IF_TRUE);
1769 assert(inst->Opcode == OPCODE_CONT ||
1770 inst->Opcode == OPCODE_BRA);
1771 /* go to instruction at tail of loop */
1772 inst->BranchTarget = endInstLoc;
1773 }
1774 }
1775 return NULL;
1776 }
1777
1778
1779 /**
1780 * Unconditional "continue" or "break" statement.
1781 * Either OPCODE_CONT, OPCODE_BRK or OPCODE_BRA will be emitted.
1782 */
1783 static struct prog_instruction *
1784 emit_cont_break(slang_emit_info *emitInfo, slang_ir_node *n)
1785 {
1786 gl_inst_opcode opcode;
1787 struct prog_instruction *inst;
1788
1789 if (n->Opcode == IR_CONT) {
1790 /* we need to execute the loop's tail code before doing CONT */
1791 assert(n->Parent);
1792 assert(n->Parent->Opcode == IR_LOOP);
1793 if (n->Parent->Children[1]) {
1794 /* emit tail code */
1795 if (emitInfo->EmitComments) {
1796 emit_comment(emitInfo, "continue - tail code:");
1797 }
1798 emit(emitInfo, n->Parent->Children[1]);
1799 }
1800 }
1801
1802 /* opcode selection */
1803 if (emitInfo->EmitHighLevelInstructions) {
1804 opcode = (n->Opcode == IR_CONT) ? OPCODE_CONT : OPCODE_BRK;
1805 }
1806 else {
1807 opcode = OPCODE_BRA;
1808 }
1809 n->InstLocation = emitInfo->prog->NumInstructions;
1810 inst = new_instruction(emitInfo, opcode);
1811 if (inst) {
1812 inst->DstReg.CondMask = COND_TR; /* always true */
1813 }
1814 return inst;
1815 }
1816
1817
1818 /**
1819 * Conditional "continue" or "break" statement.
1820 * Either OPCODE_CONT, OPCODE_BRK or OPCODE_BRA will be emitted.
1821 */
1822 static struct prog_instruction *
1823 emit_cont_break_if_true(slang_emit_info *emitInfo, slang_ir_node *n)
1824 {
1825 struct prog_instruction *inst;
1826
1827 assert(n->Opcode == IR_CONT_IF_TRUE ||
1828 n->Opcode == IR_BREAK_IF_TRUE);
1829
1830 /* evaluate condition expr, setting cond codes */
1831 inst = emit(emitInfo, n->Children[0]);
1832 if (emitInfo->EmitCondCodes) {
1833 assert(inst);
1834 inst->CondUpdate = GL_TRUE;
1835 }
1836
1837 n->InstLocation = emitInfo->prog->NumInstructions;
1838
1839 /* opcode selection */
1840 if (emitInfo->EmitHighLevelInstructions) {
1841 const gl_inst_opcode opcode
1842 = (n->Opcode == IR_CONT_IF_TRUE) ? OPCODE_CONT : OPCODE_BRK;
1843 if (emitInfo->EmitCondCodes) {
1844 /* Get the writemask from the previous instruction which set
1845 * the condcodes. Use that writemask as the CondSwizzle.
1846 */
1847 const GLuint condWritemask = inst->DstReg.WriteMask;
1848 inst = new_instruction(emitInfo, opcode);
1849 if (inst) {
1850 inst->DstReg.CondMask = COND_NE;
1851 inst->DstReg.CondSwizzle = writemask_to_swizzle(condWritemask);
1852 }
1853 return inst;
1854 }
1855 else {
1856 /* IF reg
1857 * BRK/CONT;
1858 * ENDIF
1859 */
1860 GLint ifInstLoc;
1861 ifInstLoc = emitInfo->prog->NumInstructions;
1862 inst = emit_instruction(emitInfo, OPCODE_IF,
1863 NULL, /* dest */
1864 n->Children[0]->Store,
1865 NULL,
1866 NULL);
1867 n->InstLocation = emitInfo->prog->NumInstructions;
1868
1869 inst = new_instruction(emitInfo, opcode);
1870 if (!inst) {
1871 return NULL;
1872 }
1873 inst = new_instruction(emitInfo, OPCODE_ENDIF);
1874 if (!inst) {
1875 return NULL;
1876 }
1877
1878 emitInfo->prog->Instructions[ifInstLoc].BranchTarget
1879 = emitInfo->prog->NumInstructions;
1880 return inst;
1881 }
1882 }
1883 else {
1884 const GLuint condWritemask = inst->DstReg.WriteMask;
1885 assert(emitInfo->EmitCondCodes);
1886 inst = new_instruction(emitInfo, OPCODE_BRA);
1887 if (inst) {
1888 inst->DstReg.CondMask = COND_NE;
1889 inst->DstReg.CondSwizzle = writemask_to_swizzle(condWritemask);
1890 }
1891 return inst;
1892 }
1893 }
1894
1895
1896 /**
1897 * Return the size of a swizzle mask given that some swizzle components
1898 * may be NIL/undefined. For example:
1899 * swizzle_size(".zzxx") = 4
1900 * swizzle_size(".xy??") = 2
1901 * swizzle_size(".w???") = 1
1902 */
1903 static GLuint
1904 swizzle_size(GLuint swizzle)
1905 {
1906 GLuint i;
1907 for (i = 0; i < 4; i++) {
1908 if (GET_SWZ(swizzle, i) == SWIZZLE_NIL)
1909 return i;
1910 }
1911 return 4;
1912 }
1913
1914
1915 static struct prog_instruction *
1916 emit_swizzle(slang_emit_info *emitInfo, slang_ir_node *n)
1917 {
1918 struct prog_instruction *inst;
1919
1920 inst = emit(emitInfo, n->Children[0]);
1921
1922 if (!n->Store->Parent) {
1923 /* this covers a case such as "(b ? p : q).x" */
1924 n->Store->Parent = n->Children[0]->Store;
1925 assert(n->Store->Parent);
1926 }
1927
1928 {
1929 const GLuint swizzle = n->Store->Swizzle;
1930 /* new storage is parent storage with updated Swizzle + Size fields */
1931 _slang_copy_ir_storage(n->Store, n->Store->Parent);
1932 /* Apply this node's swizzle to parent's storage */
1933 n->Store->Swizzle = _slang_swizzle_swizzle(n->Store->Swizzle, swizzle);
1934 /* Update size */
1935 n->Store->Size = swizzle_size(n->Store->Swizzle);
1936 }
1937
1938 assert(!n->Store->Parent);
1939 assert(n->Store->Index >= 0);
1940
1941 return inst;
1942 }
1943
1944
1945 /**
1946 * Dereference array element: element == array[index]
1947 * This basically involves emitting code for computing the array index
1948 * and updating the node/element's storage info.
1949 */
1950 static struct prog_instruction *
1951 emit_array_element(slang_emit_info *emitInfo, slang_ir_node *n)
1952 {
1953 slang_ir_storage *arrayStore, *indexStore;
1954 const int elemSize = n->Store->Size; /* number of floats */
1955 const GLint elemSizeVec = (elemSize + 3) / 4; /* number of vec4 */
1956 struct prog_instruction *inst;
1957
1958 assert(n->Opcode == IR_ELEMENT);
1959 assert(elemSize > 0);
1960
1961 /* special case for built-in state variables, like light state */
1962 {
1963 slang_ir_storage *root = n->Store;
1964 assert(!root->Parent);
1965 while (root->Parent)
1966 root = root->Parent;
1967
1968 if (root->File == PROGRAM_STATE_VAR) {
1969 GLboolean direct;
1970 GLint index =
1971 _slang_alloc_statevar(n, emitInfo->prog->Parameters, &direct);
1972 if (index < 0) {
1973 /* error */
1974 return NULL;
1975 }
1976 if (direct) {
1977 n->Store->Index = index;
1978 return NULL; /* all done */
1979 }
1980 }
1981 }
1982
1983 /* do codegen for array itself */
1984 emit(emitInfo, n->Children[0]);
1985 arrayStore = n->Children[0]->Store;
1986
1987 /* The initial array element storage is the array's storage,
1988 * then modified below.
1989 */
1990 _slang_copy_ir_storage(n->Store, arrayStore);
1991
1992
1993 if (n->Children[1]->Opcode == IR_FLOAT) {
1994 /* Constant array index */
1995 const GLint element = (GLint) n->Children[1]->Value[0];
1996
1997 /* this element's storage is the array's storage, plus constant offset */
1998 n->Store->Index += elemSizeVec * element;
1999 }
2000 else {
2001 /* Variable array index */
2002
2003 /* do codegen for array index expression */
2004 emit(emitInfo, n->Children[1]);
2005 indexStore = n->Children[1]->Store;
2006
2007 if (indexStore->IsIndirect) {
2008 /* need to put the array index into a temporary since we can't
2009 * directly support a[b[i]] constructs.
2010 */
2011
2012
2013 /*indexStore = tempstore();*/
2014 }
2015
2016
2017 if (elemSize > 4) {
2018 /* need to multiply array index by array element size */
2019 struct prog_instruction *inst;
2020 slang_ir_storage *indexTemp;
2021 slang_ir_storage elemSizeStore;
2022
2023 /* allocate 1 float indexTemp */
2024 indexTemp = _slang_new_ir_storage(PROGRAM_TEMPORARY, -1, 1);
2025 _slang_alloc_temp(emitInfo->vt, indexTemp);
2026
2027 /* allocate a constant containing the element size */
2028 constant_to_storage(emitInfo, (float) elemSizeVec, &elemSizeStore);
2029
2030 /* multiply array index by element size */
2031 inst = emit_instruction(emitInfo,
2032 OPCODE_MUL,
2033 indexTemp, /* dest */
2034 indexStore, /* the index */
2035 &elemSizeStore,
2036 NULL);
2037
2038 indexStore = indexTemp;
2039 }
2040
2041 if (arrayStore->IsIndirect) {
2042 /* ex: in a[i][j], a[i] (the arrayStore) is indirect */
2043 /* Need to add indexStore to arrayStore->Indirect store */
2044 slang_ir_storage indirectArray;
2045 slang_ir_storage *indexTemp;
2046
2047 _slang_init_ir_storage(&indirectArray,
2048 arrayStore->IndirectFile,
2049 arrayStore->IndirectIndex,
2050 1,
2051 arrayStore->IndirectSwizzle);
2052
2053 /* allocate 1 float indexTemp */
2054 indexTemp = _slang_new_ir_storage(PROGRAM_TEMPORARY, -1, 1);
2055 _slang_alloc_temp(emitInfo->vt, indexTemp);
2056
2057 inst = emit_instruction(emitInfo,
2058 OPCODE_ADD,
2059 indexTemp, /* dest */
2060 indexStore, /* the index */
2061 &indirectArray, /* indirect array base */
2062 NULL);
2063
2064 indexStore = indexTemp;
2065 }
2066
2067 /* update the array element storage info */
2068 n->Store->IsIndirect = GL_TRUE;
2069 n->Store->IndirectFile = indexStore->File;
2070 n->Store->IndirectIndex = indexStore->Index;
2071 n->Store->IndirectSwizzle = indexStore->Swizzle;
2072 }
2073
2074 n->Store->Size = elemSize;
2075 n->Store->Swizzle = _slang_var_swizzle(elemSize, 0);
2076
2077 return NULL; /* no instruction */
2078 }
2079
2080
2081 /**
2082 * Resolve storage for accessing a structure field.
2083 */
2084 static struct prog_instruction *
2085 emit_struct_field(slang_emit_info *emitInfo, slang_ir_node *n)
2086 {
2087 slang_ir_storage *root = n->Store;
2088 GLint fieldOffset, fieldSize;
2089
2090 assert(n->Opcode == IR_FIELD);
2091
2092 assert(!root->Parent);
2093 while (root->Parent)
2094 root = root->Parent;
2095
2096 /* If this is the field of a state var, allocate constant/uniform
2097 * storage for it now if we haven't already.
2098 * Note that we allocate storage (uniform/constant slots) for state
2099 * variables here rather than at declaration time so we only allocate
2100 * space for the ones that we actually use!
2101 */
2102 if (root->File == PROGRAM_STATE_VAR) {
2103 GLboolean direct;
2104 GLint index = _slang_alloc_statevar(n, emitInfo->prog->Parameters, &direct);
2105 if (index < 0) {
2106 slang_info_log_error(emitInfo->log, "Error parsing state variable");
2107 return NULL;
2108 }
2109 if (direct) {
2110 root->Index = index;
2111 return NULL; /* all done */
2112 }
2113 }
2114
2115 /* do codegen for struct */
2116 emit(emitInfo, n->Children[0]);
2117 assert(n->Children[0]->Store->Index >= 0);
2118
2119
2120 fieldOffset = n->Store->Index;
2121 fieldSize = n->Store->Size;
2122
2123 _slang_copy_ir_storage(n->Store, n->Children[0]->Store);
2124
2125 n->Store->Index = n->Children[0]->Store->Index + fieldOffset / 4;
2126 n->Store->Size = fieldSize;
2127
2128 switch (fieldSize) {
2129 case 1:
2130 {
2131 GLint swz = fieldOffset % 4;
2132 n->Store->Swizzle = MAKE_SWIZZLE4(swz, swz, swz, swz);
2133 }
2134 break;
2135 case 2:
2136 n->Store->Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
2137 SWIZZLE_NIL, SWIZZLE_NIL);
2138 break;
2139 case 3:
2140 n->Store->Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
2141 SWIZZLE_Z, SWIZZLE_NIL);
2142 break;
2143 default:
2144 n->Store->Swizzle = SWIZZLE_XYZW;
2145 }
2146
2147 assert(n->Store->Index >= 0);
2148
2149 return NULL; /* no instruction */
2150 }
2151
2152
2153 /**
2154 * Emit code for a variable declaration.
2155 * This usually doesn't result in any code generation, but just
2156 * memory allocation.
2157 */
2158 static struct prog_instruction *
2159 emit_var_decl(slang_emit_info *emitInfo, slang_ir_node *n)
2160 {
2161 assert(n->Store);
2162 assert(n->Store->File != PROGRAM_UNDEFINED);
2163 assert(n->Store->Size > 0);
2164 /*assert(n->Store->Index < 0);*/
2165
2166 if (!n->Var || n->Var->isTemp) {
2167 /* a nameless/temporary variable, will be freed after first use */
2168 /*NEW*/
2169 if (n->Store->Index < 0 && !_slang_alloc_temp(emitInfo->vt, n->Store)) {
2170 slang_info_log_error(emitInfo->log,
2171 "Ran out of registers, too many temporaries");
2172 return NULL;
2173 }
2174 }
2175 else {
2176 /* a regular variable */
2177 _slang_add_variable(emitInfo->vt, n->Var);
2178 if (!_slang_alloc_var(emitInfo->vt, n->Store)) {
2179 slang_info_log_error(emitInfo->log,
2180 "Ran out of registers, too many variables");
2181 return NULL;
2182 }
2183 /*
2184 printf("IR_VAR_DECL %s %d store %p\n",
2185 (char*) n->Var->a_name, n->Store->Index, (void*) n->Store);
2186 */
2187 assert(n->Var->store == n->Store);
2188 }
2189 if (emitInfo->EmitComments) {
2190 /* emit NOP with comment describing the variable's storage location */
2191 char s[1000];
2192 sprintf(s, "TEMP[%d]%s = variable %s (size %d)",
2193 n->Store->Index,
2194 _mesa_swizzle_string(n->Store->Swizzle, 0, GL_FALSE),
2195 (n->Var ? (char *) n->Var->a_name : "anonymous"),
2196 n->Store->Size);
2197 emit_comment(emitInfo, s);
2198 }
2199 return NULL;
2200 }
2201
2202
2203 /**
2204 * Emit code for a reference to a variable.
2205 * Actually, no code is generated but we may do some memory allocation.
2206 * In particular, state vars (uniforms) are allocated on an as-needed basis.
2207 */
2208 static struct prog_instruction *
2209 emit_var_ref(slang_emit_info *emitInfo, slang_ir_node *n)
2210 {
2211 assert(n->Store);
2212 assert(n->Store->File != PROGRAM_UNDEFINED);
2213
2214 if (n->Store->File == PROGRAM_STATE_VAR && n->Store->Index < 0) {
2215 GLboolean direct;
2216 GLint index = _slang_alloc_statevar(n, emitInfo->prog->Parameters, &direct);
2217 if (index < 0) {
2218 /* error */
2219 char s[100];
2220 /* XXX isn't this really an out of memory/resources error? */
2221 _mesa_snprintf(s, sizeof(s), "Undefined variable '%s'",
2222 (char *) n->Var->a_name);
2223 slang_info_log_error(emitInfo->log, s);
2224 return NULL;
2225 }
2226
2227 n->Store->Index = index;
2228 }
2229 else if (n->Store->File == PROGRAM_UNIFORM ||
2230 n->Store->File == PROGRAM_SAMPLER) {
2231 /* mark var as used */
2232 _mesa_use_uniform(emitInfo->prog->Parameters, (char *) n->Var->a_name);
2233 }
2234 else if (n->Store->File == PROGRAM_INPUT) {
2235 assert(n->Store->Index >= 0);
2236 emitInfo->prog->InputsRead |= (1 << n->Store->Index);
2237 }
2238
2239 if (n->Store->Index < 0) {
2240 /* probably ran out of registers */
2241 return NULL;
2242 }
2243 assert(n->Store->Size > 0);
2244
2245 return NULL;
2246 }
2247
2248
2249 static struct prog_instruction *
2250 emit(slang_emit_info *emitInfo, slang_ir_node *n)
2251 {
2252 struct prog_instruction *inst;
2253 if (!n)
2254 return NULL;
2255
2256 if (emitInfo->log->error_flag) {
2257 return NULL;
2258 }
2259
2260 if (n->Comment) {
2261 inst = new_instruction(emitInfo, OPCODE_NOP);
2262 if (inst) {
2263 inst->Comment = _mesa_strdup(n->Comment);
2264 }
2265 inst = NULL;
2266 }
2267
2268 switch (n->Opcode) {
2269 case IR_SEQ:
2270 /* sequence of two sub-trees */
2271 assert(n->Children[0]);
2272 assert(n->Children[1]);
2273 emit(emitInfo, n->Children[0]);
2274 if (emitInfo->log->error_flag)
2275 return NULL;
2276 inst = emit(emitInfo, n->Children[1]);
2277 #if 0
2278 assert(!n->Store);
2279 #endif
2280 n->Store = n->Children[1]->Store;
2281 return inst;
2282
2283 case IR_SCOPE:
2284 /* new variable scope */
2285 _slang_push_var_table(emitInfo->vt);
2286 inst = emit(emitInfo, n->Children[0]);
2287 _slang_pop_var_table(emitInfo->vt);
2288 return inst;
2289
2290 case IR_VAR_DECL:
2291 /* Variable declaration - allocate a register for it */
2292 inst = emit_var_decl(emitInfo, n);
2293 return inst;
2294
2295 case IR_VAR:
2296 /* Reference to a variable
2297 * Storage should have already been resolved/allocated.
2298 */
2299 return emit_var_ref(emitInfo, n);
2300
2301 case IR_ELEMENT:
2302 return emit_array_element(emitInfo, n);
2303 case IR_FIELD:
2304 return emit_struct_field(emitInfo, n);
2305 case IR_SWIZZLE:
2306 return emit_swizzle(emitInfo, n);
2307
2308 /* Simple arithmetic */
2309 /* unary */
2310 case IR_MOVE:
2311 case IR_RSQ:
2312 case IR_RCP:
2313 case IR_FLOOR:
2314 case IR_FRAC:
2315 case IR_F_TO_I:
2316 case IR_I_TO_F:
2317 case IR_ABS:
2318 case IR_SIN:
2319 case IR_COS:
2320 case IR_DDX:
2321 case IR_DDY:
2322 case IR_EXP:
2323 case IR_EXP2:
2324 case IR_LOG2:
2325 case IR_NOISE1:
2326 case IR_NOISE2:
2327 case IR_NOISE3:
2328 case IR_NOISE4:
2329 case IR_NRM4:
2330 case IR_NRM3:
2331 /* binary */
2332 case IR_ADD:
2333 case IR_SUB:
2334 case IR_MUL:
2335 case IR_DOT4:
2336 case IR_DOT3:
2337 case IR_DOT2:
2338 case IR_CROSS:
2339 case IR_MIN:
2340 case IR_MAX:
2341 case IR_SEQUAL:
2342 case IR_SNEQUAL:
2343 case IR_SGE:
2344 case IR_SGT:
2345 case IR_SLE:
2346 case IR_SLT:
2347 case IR_POW:
2348 /* trinary operators */
2349 case IR_LRP:
2350 case IR_CMP:
2351 return emit_arith(emitInfo, n);
2352
2353 case IR_EQUAL:
2354 case IR_NOTEQUAL:
2355 return emit_compare(emitInfo, n);
2356
2357 case IR_CLAMP:
2358 return emit_clamp(emitInfo, n);
2359 case IR_TEX:
2360 case IR_TEXB:
2361 case IR_TEXP:
2362 case IR_TEX_SH:
2363 case IR_TEXB_SH:
2364 case IR_TEXP_SH:
2365 return emit_tex(emitInfo, n);
2366 case IR_NEG:
2367 return emit_negation(emitInfo, n);
2368 case IR_FLOAT:
2369 /* find storage location for this float constant */
2370 n->Store->Index = _mesa_add_unnamed_constant(emitInfo->prog->Parameters,
2371 n->Value,
2372 n->Store->Size,
2373 &n->Store->Swizzle);
2374 if (n->Store->Index < 0) {
2375 slang_info_log_error(emitInfo->log, "Ran out of space for constants");
2376 return NULL;
2377 }
2378 return NULL;
2379
2380 case IR_COPY:
2381 return emit_copy(emitInfo, n);
2382
2383 case IR_COND:
2384 return emit_cond(emitInfo, n);
2385
2386 case IR_NOT:
2387 return emit_not(emitInfo, n);
2388
2389 case IR_LABEL:
2390 return emit_label(emitInfo, n);
2391
2392 case IR_KILL:
2393 return emit_kill(emitInfo);
2394
2395 case IR_CALL:
2396 /* new variable scope for subroutines/function calls */
2397 _slang_push_var_table(emitInfo->vt);
2398 inst = emit_fcall(emitInfo, n);
2399 _slang_pop_var_table(emitInfo->vt);
2400 return inst;
2401
2402 case IR_IF:
2403 return emit_if(emitInfo, n);
2404
2405 case IR_LOOP:
2406 return emit_loop(emitInfo, n);
2407 case IR_BREAK_IF_TRUE:
2408 case IR_CONT_IF_TRUE:
2409 return emit_cont_break_if_true(emitInfo, n);
2410 case IR_BREAK:
2411 /* fall-through */
2412 case IR_CONT:
2413 return emit_cont_break(emitInfo, n);
2414
2415 case IR_BEGIN_SUB:
2416 return new_instruction(emitInfo, OPCODE_BGNSUB);
2417 case IR_END_SUB:
2418 return new_instruction(emitInfo, OPCODE_ENDSUB);
2419 case IR_RETURN:
2420 return emit_return(emitInfo, n);
2421
2422 case IR_NOP:
2423 return NULL;
2424
2425 default:
2426 _mesa_problem(NULL, "Unexpected IR opcode in emit()\n");
2427 }
2428 return NULL;
2429 }
2430
2431
2432 /**
2433 * After code generation, any subroutines will be in separate program
2434 * objects. This function appends all the subroutines onto the main
2435 * program and resolves the linking of all the branch/call instructions.
2436 * XXX this logic should really be part of the linking process...
2437 */
2438 static void
2439 _slang_resolve_subroutines(slang_emit_info *emitInfo)
2440 {
2441 GET_CURRENT_CONTEXT(ctx);
2442 struct gl_program *mainP = emitInfo->prog;
2443 GLuint *subroutineLoc, i, total;
2444
2445 subroutineLoc
2446 = (GLuint *) _mesa_malloc(emitInfo->NumSubroutines * sizeof(GLuint));
2447
2448 /* total number of instructions */
2449 total = mainP->NumInstructions;
2450 for (i = 0; i < emitInfo->NumSubroutines; i++) {
2451 subroutineLoc[i] = total;
2452 total += emitInfo->Subroutines[i]->NumInstructions;
2453 }
2454
2455 /* adjust BranchTargets within the functions */
2456 for (i = 0; i < emitInfo->NumSubroutines; i++) {
2457 struct gl_program *sub = emitInfo->Subroutines[i];
2458 GLuint j;
2459 for (j = 0; j < sub->NumInstructions; j++) {
2460 struct prog_instruction *inst = sub->Instructions + j;
2461 if (inst->Opcode != OPCODE_CAL && inst->BranchTarget >= 0) {
2462 inst->BranchTarget += subroutineLoc[i];
2463 }
2464 }
2465 }
2466
2467 /* append subroutines' instructions after main's instructions */
2468 mainP->Instructions = _mesa_realloc_instructions(mainP->Instructions,
2469 mainP->NumInstructions,
2470 total);
2471 mainP->NumInstructions = total;
2472 for (i = 0; i < emitInfo->NumSubroutines; i++) {
2473 struct gl_program *sub = emitInfo->Subroutines[i];
2474 _mesa_copy_instructions(mainP->Instructions + subroutineLoc[i],
2475 sub->Instructions,
2476 sub->NumInstructions);
2477 /* delete subroutine code */
2478 sub->Parameters = NULL; /* prevent double-free */
2479 _mesa_reference_program(ctx, &emitInfo->Subroutines[i], NULL);
2480 }
2481
2482 /* free subroutine list */
2483 if (emitInfo->Subroutines) {
2484 _mesa_free(emitInfo->Subroutines);
2485 emitInfo->Subroutines = NULL;
2486 }
2487 emitInfo->NumSubroutines = 0;
2488
2489 /* Examine CAL instructions.
2490 * At this point, the BranchTarget field of the CAL instruction is
2491 * the number/id of the subroutine to call (an index into the
2492 * emitInfo->Subroutines list).
2493 * Translate that into an actual instruction location now.
2494 */
2495 for (i = 0; i < mainP->NumInstructions; i++) {
2496 struct prog_instruction *inst = mainP->Instructions + i;
2497 if (inst->Opcode == OPCODE_CAL) {
2498 const GLuint f = inst->BranchTarget;
2499 inst->BranchTarget = subroutineLoc[f];
2500 }
2501 }
2502
2503 _mesa_free(subroutineLoc);
2504 }
2505
2506
2507
2508 /**
2509 * Convert the IR tree into GPU instructions.
2510 * \param n root of IR tree
2511 * \param vt variable table
2512 * \param prog program to put GPU instructions into
2513 * \param pragmas controls codegen options
2514 * \param withEnd if true, emit END opcode at end
2515 * \param log log for emitting errors/warnings/info
2516 */
2517 GLboolean
2518 _slang_emit_code(slang_ir_node *n, slang_var_table *vt,
2519 struct gl_program *prog,
2520 const struct gl_sl_pragmas *pragmas,
2521 GLboolean withEnd,
2522 slang_info_log *log)
2523 {
2524 GET_CURRENT_CONTEXT(ctx);
2525 GLboolean success;
2526 slang_emit_info emitInfo;
2527 GLuint maxUniforms;
2528
2529 emitInfo.log = log;
2530 emitInfo.vt = vt;
2531 emitInfo.prog = prog;
2532 emitInfo.Subroutines = NULL;
2533 emitInfo.NumSubroutines = 0;
2534 emitInfo.MaxInstructions = prog->NumInstructions;
2535
2536 emitInfo.EmitHighLevelInstructions = ctx->Shader.EmitHighLevelInstructions;
2537 emitInfo.EmitCondCodes = ctx->Shader.EmitCondCodes;
2538 emitInfo.EmitComments = ctx->Shader.EmitComments || pragmas->Debug;
2539 emitInfo.EmitBeginEndSub = GL_TRUE;
2540
2541 if (!emitInfo.EmitCondCodes) {
2542 emitInfo.EmitHighLevelInstructions = GL_TRUE;
2543 }
2544
2545 /* Check uniform/constant limits */
2546 if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
2547 maxUniforms = ctx->Const.FragmentProgram.MaxUniformComponents / 4;
2548 }
2549 else {
2550 assert(prog->Target == GL_VERTEX_PROGRAM_ARB);
2551 maxUniforms = ctx->Const.VertexProgram.MaxUniformComponents / 4;
2552 }
2553 if (prog->Parameters->NumParameters > maxUniforms) {
2554 slang_info_log_error(log, "Constant/uniform register limit exceeded "
2555 "(max=%u vec4)", maxUniforms);
2556
2557 return GL_FALSE;
2558 }
2559
2560 (void) emit(&emitInfo, n);
2561
2562 /* finish up by adding the END opcode to program */
2563 if (withEnd) {
2564 struct prog_instruction *inst;
2565 inst = new_instruction(&emitInfo, OPCODE_END);
2566 if (!inst) {
2567 return GL_FALSE;
2568 }
2569 }
2570
2571 _slang_resolve_subroutines(&emitInfo);
2572
2573 success = GL_TRUE;
2574
2575 #if 0
2576 printf("*********** End emit code (%u inst):\n", prog->NumInstructions);
2577 _mesa_print_program(prog);
2578 _mesa_print_program_parameters(ctx,prog);
2579 #endif
2580
2581 return success;
2582 }