nv50: fix build-predicate function
[mesa.git] / src / mesa / slang / slang_emit.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2005-2008 Brian Paul All Rights Reserved.
5 * Copyright (C) 2008 VMware, Inc. All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file slang_emit.c
27 * Emit program instructions (PI code) from IR trees.
28 * \author Brian Paul
29 */
30
31 /***
32 *** NOTES
33 ***
34 *** To emit GPU instructions, we basically just do an in-order traversal
35 *** of the IR tree.
36 ***/
37
38
39 #include "main/imports.h"
40 #include "main/context.h"
41 #include "program/program.h"
42 #include "program/prog_instruction.h"
43 #include "program/prog_parameter.h"
44 #include "program/prog_print.h"
45 #include "slang_builtin.h"
46 #include "slang_emit.h"
47 #include "slang_mem.h"
48
49
50 #define PEEPHOLE_OPTIMIZATIONS 1
51 #define ANNOTATE 0
52
53
54 typedef struct
55 {
56 slang_info_log *log;
57 slang_var_table *vt;
58 struct gl_program *prog;
59 struct gl_program **Subroutines;
60 GLuint NumSubroutines;
61
62 GLuint MaxInstructions; /**< size of prog->Instructions[] buffer */
63
64 GLboolean UnresolvedFunctions;
65
66 /* code-gen options */
67 GLboolean EmitHighLevelInstructions;
68 GLboolean EmitCondCodes;
69 GLboolean EmitComments;
70 GLboolean EmitBeginEndSub; /* XXX TEMPORARY */
71 } slang_emit_info;
72
73
74
75 static struct gl_program *
76 new_subroutine(slang_emit_info *emitInfo, GLuint *id)
77 {
78 GET_CURRENT_CONTEXT(ctx);
79 const GLuint n = emitInfo->NumSubroutines;
80
81 emitInfo->Subroutines = (struct gl_program **)
82 _mesa_realloc(emitInfo->Subroutines,
83 n * sizeof(struct gl_program *),
84 (n + 1) * sizeof(struct gl_program *));
85 emitInfo->Subroutines[n] = ctx->Driver.NewProgram(ctx, emitInfo->prog->Target, 0);
86 emitInfo->Subroutines[n]->Parameters = emitInfo->prog->Parameters;
87 emitInfo->NumSubroutines++;
88 *id = n;
89 return emitInfo->Subroutines[n];
90 }
91
92
93 /**
94 * Convert a writemask to a swizzle. Used for testing cond codes because
95 * we only want to test the cond code component(s) that was set by the
96 * previous instruction.
97 */
98 static GLuint
99 writemask_to_swizzle(GLuint writemask)
100 {
101 if (writemask == WRITEMASK_X)
102 return SWIZZLE_XXXX;
103 if (writemask == WRITEMASK_Y)
104 return SWIZZLE_YYYY;
105 if (writemask == WRITEMASK_Z)
106 return SWIZZLE_ZZZZ;
107 if (writemask == WRITEMASK_W)
108 return SWIZZLE_WWWW;
109 return SWIZZLE_XYZW; /* shouldn't be hit */
110 }
111
112
113 /**
114 * Convert a swizzle mask to a writemask.
115 * Note that the slang_ir_storage->Swizzle field can represent either a
116 * swizzle mask or a writemask, depending on how it's used. For example,
117 * when we parse "direction.yz" alone, we don't know whether .yz is a
118 * writemask or a swizzle. In this case, we encode ".yz" in store->Swizzle
119 * as a swizzle mask (.yz?? actually). Later, if direction.yz is used as
120 * an R-value, we use store->Swizzle as-is. Otherwise, if direction.yz is
121 * used as an L-value, we convert it to a writemask.
122 */
123 static GLuint
124 swizzle_to_writemask(GLuint swizzle)
125 {
126 GLuint i, writemask = 0x0;
127 for (i = 0; i < 4; i++) {
128 GLuint swz = GET_SWZ(swizzle, i);
129 if (swz <= SWIZZLE_W) {
130 writemask |= (1 << swz);
131 }
132 }
133 return writemask;
134 }
135
136
137 /**
138 * Swizzle a swizzle (function composition).
139 * That is, return swz2(swz1), or said another way: swz1.szw2
140 * Example: swizzle_swizzle(".zwxx", ".xxyw") yields ".zzwx"
141 */
142 GLuint
143 _slang_swizzle_swizzle(GLuint swz1, GLuint swz2)
144 {
145 GLuint i, swz, s[4];
146 for (i = 0; i < 4; i++) {
147 GLuint c = GET_SWZ(swz2, i);
148 if (c <= SWIZZLE_W)
149 s[i] = GET_SWZ(swz1, c);
150 else
151 s[i] = c;
152 }
153 swz = MAKE_SWIZZLE4(s[0], s[1], s[2], s[3]);
154 return swz;
155 }
156
157
158 /**
159 * Return the default swizzle mask for accessing a variable of the
160 * given size (in floats). If size = 1, comp is used to identify
161 * which component [0..3] of the register holds the variable.
162 */
163 GLuint
164 _slang_var_swizzle(GLint size, GLint comp)
165 {
166 switch (size) {
167 case 1:
168 return MAKE_SWIZZLE4(comp, SWIZZLE_NIL, SWIZZLE_NIL, SWIZZLE_NIL);
169 case 2:
170 return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_NIL, SWIZZLE_NIL);
171 case 3:
172 return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_NIL);
173 default:
174 return SWIZZLE_XYZW;
175 }
176 }
177
178
179
180 /**
181 * Allocate storage for the given node (if it hasn't already been allocated).
182 *
183 * Typically this is temporary storage for an intermediate result (such as
184 * for a multiply or add, etc).
185 *
186 * If n->Store does not exist it will be created and will be of the size
187 * specified by defaultSize.
188 */
189 static GLboolean
190 alloc_node_storage(slang_emit_info *emitInfo, slang_ir_node *n,
191 GLint defaultSize)
192 {
193 assert(!n->Var);
194 if (!n->Store) {
195 assert(defaultSize > 0);
196 n->Store = _slang_new_ir_storage(PROGRAM_TEMPORARY, -1, defaultSize);
197 if (!n->Store) {
198 return GL_FALSE;
199 }
200 }
201
202 /* now allocate actual register(s). I.e. set n->Store->Index >= 0 */
203 if (n->Store->Index < 0) {
204 if (!_slang_alloc_temp(emitInfo->vt, n->Store)) {
205 slang_info_log_error(emitInfo->log,
206 "Ran out of registers, too many temporaries");
207 _slang_free(n->Store);
208 n->Store = NULL;
209 return GL_FALSE;
210 }
211 }
212 return GL_TRUE;
213 }
214
215
216 /**
217 * Free temporary storage, if n->Store is, in fact, temp storage.
218 * Otherwise, no-op.
219 */
220 static void
221 free_node_storage(slang_var_table *vt, slang_ir_node *n)
222 {
223 if (n->Store->File == PROGRAM_TEMPORARY &&
224 n->Store->Index >= 0 &&
225 n->Opcode != IR_SWIZZLE) {
226 if (_slang_is_temp(vt, n->Store)) {
227 _slang_free_temp(vt, n->Store);
228 n->Store->Index = -1;
229 n->Store = NULL; /* XXX this may not be needed */
230 }
231 }
232 }
233
234
235 /**
236 * Helper function to allocate a short-term temporary.
237 * Free it with _slang_free_temp().
238 */
239 static GLboolean
240 alloc_local_temp(slang_emit_info *emitInfo, slang_ir_storage *temp, GLint size)
241 {
242 assert(size >= 1);
243 assert(size <= 4);
244 memset(temp, 0, sizeof(*temp));
245 temp->Size = size;
246 temp->File = PROGRAM_TEMPORARY;
247 temp->Index = -1;
248 return _slang_alloc_temp(emitInfo->vt, temp);
249 }
250
251
252 /**
253 * Remove any SWIZZLE_NIL terms from given swizzle mask.
254 * For a swizzle like .z??? generate .zzzz (replicate single component).
255 * Else, for .wx?? generate .wxzw (insert default component for the position).
256 */
257 static GLuint
258 fix_swizzle(GLuint swizzle)
259 {
260 GLuint c0 = GET_SWZ(swizzle, 0),
261 c1 = GET_SWZ(swizzle, 1),
262 c2 = GET_SWZ(swizzle, 2),
263 c3 = GET_SWZ(swizzle, 3);
264 if (c1 == SWIZZLE_NIL && c2 == SWIZZLE_NIL && c3 == SWIZZLE_NIL) {
265 /* smear first component across all positions */
266 c1 = c2 = c3 = c0;
267 }
268 else {
269 /* insert default swizzle components */
270 if (c0 == SWIZZLE_NIL)
271 c0 = SWIZZLE_X;
272 if (c1 == SWIZZLE_NIL)
273 c1 = SWIZZLE_Y;
274 if (c2 == SWIZZLE_NIL)
275 c2 = SWIZZLE_Z;
276 if (c3 == SWIZZLE_NIL)
277 c3 = SWIZZLE_W;
278 }
279 return MAKE_SWIZZLE4(c0, c1, c2, c3);
280 }
281
282
283
284 /**
285 * Convert IR storage to an instruction dst register.
286 */
287 static void
288 storage_to_dst_reg(struct prog_dst_register *dst, const slang_ir_storage *st)
289 {
290 const GLboolean relAddr = st->RelAddr;
291 const GLint size = st->Size;
292 GLint index = st->Index;
293 GLuint swizzle = st->Swizzle;
294
295 assert(index >= 0);
296 /* if this is storage relative to some parent storage, walk up the tree */
297 while (st->Parent) {
298 st = st->Parent;
299 assert(st->Index >= 0);
300 index += st->Index;
301 swizzle = _slang_swizzle_swizzle(st->Swizzle, swizzle);
302 }
303
304 assert(st->File != PROGRAM_UNDEFINED);
305 dst->File = st->File;
306
307 assert(index >= 0);
308 dst->Index = index;
309
310 assert(size >= 1);
311 assert(size <= 4);
312
313 if (swizzle != SWIZZLE_XYZW) {
314 dst->WriteMask = swizzle_to_writemask(swizzle);
315 }
316 else {
317 switch (size) {
318 case 1:
319 dst->WriteMask = WRITEMASK_X << GET_SWZ(st->Swizzle, 0);
320 break;
321 case 2:
322 dst->WriteMask = WRITEMASK_XY;
323 break;
324 case 3:
325 dst->WriteMask = WRITEMASK_XYZ;
326 break;
327 case 4:
328 dst->WriteMask = WRITEMASK_XYZW;
329 break;
330 default:
331 ; /* error would have been caught above */
332 }
333 }
334
335 dst->RelAddr = relAddr;
336 }
337
338
339 /**
340 * Convert IR storage to an instruction src register.
341 */
342 static void
343 storage_to_src_reg(struct prog_src_register *src, const slang_ir_storage *st)
344 {
345 const GLboolean relAddr = st->RelAddr;
346 GLint index = st->Index;
347 GLuint swizzle = st->Swizzle;
348
349 /* if this is storage relative to some parent storage, walk up the tree */
350 assert(index >= 0);
351 while (st->Parent) {
352 st = st->Parent;
353 if (st->Index < 0) {
354 /* an error should have been reported already */
355 return;
356 }
357 assert(st->Index >= 0);
358 index += st->Index;
359 swizzle = _slang_swizzle_swizzle(fix_swizzle(st->Swizzle), swizzle);
360 }
361
362 assert(st->File >= 0);
363 #if 1 /* XXX temporary */
364 if (st->File == PROGRAM_UNDEFINED) {
365 slang_ir_storage *st0 = (slang_ir_storage *) st;
366 st0->File = PROGRAM_TEMPORARY;
367 }
368 #endif
369 assert(st->File < PROGRAM_FILE_MAX);
370 src->File = st->File;
371
372 assert(index >= 0);
373 src->Index = index;
374
375 swizzle = fix_swizzle(swizzle);
376 assert(GET_SWZ(swizzle, 0) <= SWIZZLE_W);
377 assert(GET_SWZ(swizzle, 1) <= SWIZZLE_W);
378 assert(GET_SWZ(swizzle, 2) <= SWIZZLE_W);
379 assert(GET_SWZ(swizzle, 3) <= SWIZZLE_W);
380 src->Swizzle = swizzle;
381
382 src->HasIndex2 = st->Is2D;
383 src->Index2 = st->Index2;
384
385 src->RelAddr = relAddr;
386 }
387
388
389 /*
390 * Setup storage pointing to a scalar constant/literal.
391 */
392 static void
393 constant_to_storage(slang_emit_info *emitInfo,
394 GLfloat val,
395 slang_ir_storage *store)
396 {
397 GLuint swizzle;
398 GLint reg;
399 GLfloat value[4];
400
401 value[0] = val;
402 reg = _mesa_add_unnamed_constant(emitInfo->prog->Parameters,
403 value, 1, &swizzle);
404
405 memset(store, 0, sizeof(*store));
406 store->File = PROGRAM_CONSTANT;
407 store->Index = reg;
408 store->Swizzle = swizzle;
409 }
410
411
412 /**
413 * Add new instruction at end of given program.
414 * \param prog the program to append instruction onto
415 * \param opcode opcode for the new instruction
416 * \return pointer to the new instruction
417 */
418 static struct prog_instruction *
419 new_instruction(slang_emit_info *emitInfo, gl_inst_opcode opcode)
420 {
421 struct gl_program *prog = emitInfo->prog;
422 struct prog_instruction *inst;
423
424 #if 0
425 /* print prev inst */
426 if (prog->NumInstructions > 0) {
427 _mesa_print_instruction(prog->Instructions + prog->NumInstructions - 1);
428 }
429 #endif
430 assert(prog->NumInstructions <= emitInfo->MaxInstructions);
431
432 if (prog->NumInstructions == emitInfo->MaxInstructions) {
433 /* grow the instruction buffer */
434 emitInfo->MaxInstructions += 20;
435 prog->Instructions =
436 _mesa_realloc_instructions(prog->Instructions,
437 prog->NumInstructions,
438 emitInfo->MaxInstructions);
439 if (!prog->Instructions) {
440 return NULL;
441 }
442 }
443
444 inst = prog->Instructions + prog->NumInstructions;
445 prog->NumInstructions++;
446 _mesa_init_instructions(inst, 1);
447 inst->Opcode = opcode;
448 inst->BranchTarget = -1; /* invalid */
449 /*
450 printf("New inst %d: %p %s\n", prog->NumInstructions-1,(void*)inst,
451 _mesa_opcode_string(inst->Opcode));
452 */
453 return inst;
454 }
455
456
457 static struct prog_instruction *
458 emit_arl_load(slang_emit_info *emitInfo,
459 gl_register_file file, GLint index, GLuint swizzle)
460 {
461 struct prog_instruction *inst = new_instruction(emitInfo, OPCODE_ARL);
462 if (inst) {
463 inst->SrcReg[0].File = file;
464 inst->SrcReg[0].Index = index;
465 inst->SrcReg[0].Swizzle = fix_swizzle(swizzle);
466 inst->DstReg.File = PROGRAM_ADDRESS;
467 inst->DstReg.Index = 0;
468 inst->DstReg.WriteMask = WRITEMASK_X;
469 }
470 return inst;
471 }
472
473
474 /**
475 * Emit a new instruction with given opcode, operands.
476 * At this point the instruction may have multiple indirect register
477 * loads/stores. We convert those into ARL loads and address-relative
478 * operands. See comments inside.
479 * At some point in the future we could directly emit indirectly addressed
480 * registers in Mesa GPU instructions.
481 */
482 static struct prog_instruction *
483 emit_instruction(slang_emit_info *emitInfo,
484 gl_inst_opcode opcode,
485 const slang_ir_storage *dst,
486 const slang_ir_storage *src0,
487 const slang_ir_storage *src1,
488 const slang_ir_storage *src2)
489 {
490 struct prog_instruction *inst;
491 GLuint numIndirect = 0;
492 const slang_ir_storage *src[3];
493 slang_ir_storage newSrc[3], newDst;
494 GLuint i;
495 GLboolean isTemp[3];
496
497 isTemp[0] = isTemp[1] = isTemp[2] = GL_FALSE;
498
499 src[0] = src0;
500 src[1] = src1;
501 src[2] = src2;
502
503 /* count up how many operands are indirect loads */
504 for (i = 0; i < 3; i++) {
505 if (src[i] && src[i]->IsIndirect)
506 numIndirect++;
507 }
508 if (dst && dst->IsIndirect)
509 numIndirect++;
510
511 /* Take special steps for indirect register loads.
512 * If we had multiple address registers this would be simpler.
513 * For example, this GLSL code:
514 * x[i] = y[j] + z[k];
515 * would translate into something like:
516 * ARL ADDR.x, i;
517 * ARL ADDR.y, j;
518 * ARL ADDR.z, k;
519 * ADD TEMP[ADDR.x+5], TEMP[ADDR.y+9], TEMP[ADDR.z+4];
520 * But since we currently only have one address register we have to do this:
521 * ARL ADDR.x, i;
522 * MOV t1, TEMP[ADDR.x+9];
523 * ARL ADDR.x, j;
524 * MOV t2, TEMP[ADDR.x+4];
525 * ARL ADDR.x, k;
526 * ADD TEMP[ADDR.x+5], t1, t2;
527 * The code here figures this out...
528 */
529 if (numIndirect > 0) {
530 for (i = 0; i < 3; i++) {
531 if (src[i] && src[i]->IsIndirect) {
532 /* load the ARL register with the indirect register */
533 emit_arl_load(emitInfo,
534 src[i]->IndirectFile,
535 src[i]->IndirectIndex,
536 src[i]->IndirectSwizzle);
537
538 if (numIndirect > 1) {
539 /* Need to load src[i] into a temporary register */
540 slang_ir_storage srcRelAddr;
541 alloc_local_temp(emitInfo, &newSrc[i], src[i]->Size);
542 isTemp[i] = GL_TRUE;
543
544 /* set RelAddr flag on src register */
545 srcRelAddr = *src[i];
546 srcRelAddr.RelAddr = GL_TRUE;
547 srcRelAddr.IsIndirect = GL_FALSE; /* not really needed */
548
549 /* MOV newSrc, srcRelAddr; */
550 inst = emit_instruction(emitInfo,
551 OPCODE_MOV,
552 &newSrc[i],
553 &srcRelAddr,
554 NULL,
555 NULL);
556 if (!inst) {
557 return NULL;
558 }
559
560 src[i] = &newSrc[i];
561 }
562 else {
563 /* just rewrite the src[i] storage to be ARL-relative */
564 newSrc[i] = *src[i];
565 newSrc[i].RelAddr = GL_TRUE;
566 newSrc[i].IsIndirect = GL_FALSE; /* not really needed */
567 src[i] = &newSrc[i];
568 }
569 }
570 }
571 }
572
573 /* Take special steps for indirect dest register write */
574 if (dst && dst->IsIndirect) {
575 /* load the ARL register with the indirect register */
576 emit_arl_load(emitInfo,
577 dst->IndirectFile,
578 dst->IndirectIndex,
579 dst->IndirectSwizzle);
580 newDst = *dst;
581 newDst.RelAddr = GL_TRUE;
582 newDst.IsIndirect = GL_FALSE;
583 dst = &newDst;
584 }
585
586 /* OK, emit the instruction and its dst, src regs */
587 inst = new_instruction(emitInfo, opcode);
588 if (!inst)
589 return NULL;
590
591 if (dst)
592 storage_to_dst_reg(&inst->DstReg, dst);
593
594 for (i = 0; i < 3; i++) {
595 if (src[i])
596 storage_to_src_reg(&inst->SrcReg[i], src[i]);
597 }
598
599 /* Free any temp registers that we allocated above */
600 for (i = 0; i < 3; i++) {
601 if (isTemp[i])
602 _slang_free_temp(emitInfo->vt, &newSrc[i]);
603 }
604
605 return inst;
606 }
607
608
609
610 /**
611 * Put a comment on the given instruction.
612 */
613 static void
614 inst_comment(struct prog_instruction *inst, const char *comment)
615 {
616 if (inst)
617 inst->Comment = _mesa_strdup(comment);
618 }
619
620
621
622 /**
623 * Return pointer to last instruction in program.
624 */
625 static struct prog_instruction *
626 prev_instruction(slang_emit_info *emitInfo)
627 {
628 struct gl_program *prog = emitInfo->prog;
629 if (prog->NumInstructions == 0)
630 return NULL;
631 else
632 return prog->Instructions + prog->NumInstructions - 1;
633 }
634
635
636 static struct prog_instruction *
637 emit(slang_emit_info *emitInfo, slang_ir_node *n);
638
639
640 /**
641 * Return an annotation string for given node's storage.
642 */
643 static char *
644 storage_annotation(const slang_ir_node *n, const struct gl_program *prog)
645 {
646 #if ANNOTATE
647 const slang_ir_storage *st = n->Store;
648 static char s[100] = "";
649
650 if (!st)
651 return _mesa_strdup("");
652
653 switch (st->File) {
654 case PROGRAM_CONSTANT:
655 if (st->Index >= 0) {
656 const GLfloat *val = prog->Parameters->ParameterValues[st->Index];
657 if (st->Swizzle == SWIZZLE_NOOP)
658 _mesa_snprintf(s, sizeof(s), "{%g, %g, %g, %g}", val[0], val[1], val[2], val[3]);
659 else {
660 _mesa_snprintf(s, sizeof(s), "%g", val[GET_SWZ(st->Swizzle, 0)]);
661 }
662 }
663 break;
664 case PROGRAM_TEMPORARY:
665 if (n->Var)
666 _mesa_snprintf(s, sizeof(s), "%s", (char *) n->Var->a_name);
667 else
668 _mesa_snprintf(s, sizeof(s), "t[%d]", st->Index);
669 break;
670 case PROGRAM_STATE_VAR:
671 case PROGRAM_UNIFORM:
672 _mesa_snprintf(s, sizeof(s), "%s", prog->Parameters->Parameters[st->Index].Name);
673 break;
674 case PROGRAM_VARYING:
675 _mesa_snprintf(s, sizeof(s), "%s", prog->Varying->Parameters[st->Index].Name);
676 break;
677 case PROGRAM_INPUT:
678 _mesa_snprintf(s, sizeof(s), "input[%d]", st->Index);
679 break;
680 case PROGRAM_OUTPUT:
681 _mesa_snprintf(s, sizeof(s), "output[%d]", st->Index);
682 break;
683 default:
684 s[0] = 0;
685 }
686 return _mesa_strdup(s);
687 #else
688 return NULL;
689 #endif
690 }
691
692
693 /**
694 * Return an annotation string for an instruction.
695 */
696 static char *
697 instruction_annotation(gl_inst_opcode opcode, char *dstAnnot,
698 char *srcAnnot0, char *srcAnnot1, char *srcAnnot2)
699 {
700 #if ANNOTATE
701 const char *operator;
702 char *s;
703 int len = 50;
704
705 if (dstAnnot)
706 len += strlen(dstAnnot);
707 else
708 dstAnnot = _mesa_strdup("");
709
710 if (srcAnnot0)
711 len += strlen(srcAnnot0);
712 else
713 srcAnnot0 = _mesa_strdup("");
714
715 if (srcAnnot1)
716 len += strlen(srcAnnot1);
717 else
718 srcAnnot1 = _mesa_strdup("");
719
720 if (srcAnnot2)
721 len += strlen(srcAnnot2);
722 else
723 srcAnnot2 = _mesa_strdup("");
724
725 switch (opcode) {
726 case OPCODE_ADD:
727 operator = "+";
728 break;
729 case OPCODE_SUB:
730 operator = "-";
731 break;
732 case OPCODE_MUL:
733 operator = "*";
734 break;
735 case OPCODE_DP2:
736 operator = "DP2";
737 break;
738 case OPCODE_DP3:
739 operator = "DP3";
740 break;
741 case OPCODE_DP4:
742 operator = "DP4";
743 break;
744 case OPCODE_XPD:
745 operator = "XPD";
746 break;
747 case OPCODE_RSQ:
748 operator = "RSQ";
749 break;
750 case OPCODE_SGT:
751 operator = ">";
752 break;
753 default:
754 operator = ",";
755 }
756
757 s = (char *) malloc(len);
758 _mesa_snprintf(s, len, "%s = %s %s %s %s", dstAnnot,
759 srcAnnot0, operator, srcAnnot1, srcAnnot2);
760
761 free(dstAnnot);
762 free(srcAnnot0);
763 free(srcAnnot1);
764 free(srcAnnot2);
765
766 return s;
767 #else
768 return NULL;
769 #endif
770 }
771
772
773 /**
774 * Emit an instruction that's just a comment.
775 */
776 static struct prog_instruction *
777 emit_comment(slang_emit_info *emitInfo, const char *comment)
778 {
779 struct prog_instruction *inst = new_instruction(emitInfo, OPCODE_NOP);
780 if (inst) {
781 inst_comment(inst, comment);
782 }
783 return inst;
784 }
785
786
787 /**
788 * Generate code for a simple arithmetic instruction.
789 * Either 1, 2 or 3 operands.
790 */
791 static struct prog_instruction *
792 emit_arith(slang_emit_info *emitInfo, slang_ir_node *n)
793 {
794 const slang_ir_info *info = _slang_ir_info(n->Opcode);
795 struct prog_instruction *inst;
796 GLuint i;
797
798 assert(info);
799 assert(info->InstOpcode != OPCODE_NOP);
800
801 #if PEEPHOLE_OPTIMIZATIONS
802 /* Look for MAD opportunity */
803 if (info->NumParams == 2 &&
804 n->Opcode == IR_ADD && n->Children[0]->Opcode == IR_MUL) {
805 /* found pattern IR_ADD(IR_MUL(A, B), C) */
806 emit(emitInfo, n->Children[0]->Children[0]); /* A */
807 emit(emitInfo, n->Children[0]->Children[1]); /* B */
808 emit(emitInfo, n->Children[1]); /* C */
809 if (!alloc_node_storage(emitInfo, n, -1)) { /* dest */
810 return NULL;
811 }
812
813 inst = emit_instruction(emitInfo,
814 OPCODE_MAD,
815 n->Store,
816 n->Children[0]->Children[0]->Store,
817 n->Children[0]->Children[1]->Store,
818 n->Children[1]->Store);
819
820 free_node_storage(emitInfo->vt, n->Children[0]->Children[0]);
821 free_node_storage(emitInfo->vt, n->Children[0]->Children[1]);
822 free_node_storage(emitInfo->vt, n->Children[1]);
823 return inst;
824 }
825
826 if (info->NumParams == 2 &&
827 n->Opcode == IR_ADD && n->Children[1]->Opcode == IR_MUL) {
828 /* found pattern IR_ADD(A, IR_MUL(B, C)) */
829 emit(emitInfo, n->Children[0]); /* A */
830 emit(emitInfo, n->Children[1]->Children[0]); /* B */
831 emit(emitInfo, n->Children[1]->Children[1]); /* C */
832 if (!alloc_node_storage(emitInfo, n, -1)) { /* dest */
833 return NULL;
834 }
835
836 inst = emit_instruction(emitInfo,
837 OPCODE_MAD,
838 n->Store,
839 n->Children[1]->Children[0]->Store,
840 n->Children[1]->Children[1]->Store,
841 n->Children[0]->Store);
842
843 free_node_storage(emitInfo->vt, n->Children[1]->Children[0]);
844 free_node_storage(emitInfo->vt, n->Children[1]->Children[1]);
845 free_node_storage(emitInfo->vt, n->Children[0]);
846 return inst;
847 }
848 #endif
849
850 /* gen code for children, may involve temp allocation */
851 for (i = 0; i < info->NumParams; i++) {
852 emit(emitInfo, n->Children[i]);
853 if (!n->Children[i] || !n->Children[i]->Store) {
854 /* error recovery */
855 return NULL;
856 }
857 }
858
859 /* result storage */
860 if (!alloc_node_storage(emitInfo, n, -1)) {
861 return NULL;
862 }
863
864 inst = emit_instruction(emitInfo,
865 info->InstOpcode,
866 n->Store, /* dest */
867 (info->NumParams > 0 ? n->Children[0]->Store : NULL),
868 (info->NumParams > 1 ? n->Children[1]->Store : NULL),
869 (info->NumParams > 2 ? n->Children[2]->Store : NULL)
870 );
871
872 /* free temps */
873 for (i = 0; i < info->NumParams; i++)
874 free_node_storage(emitInfo->vt, n->Children[i]);
875
876 return inst;
877 }
878
879
880 /**
881 * Emit code for == and != operators. These could normally be handled
882 * by emit_arith() except we need to be able to handle structure comparisons.
883 */
884 static struct prog_instruction *
885 emit_compare(slang_emit_info *emitInfo, slang_ir_node *n)
886 {
887 struct prog_instruction *inst = NULL;
888 GLint size;
889
890 assert(n->Opcode == IR_EQUAL || n->Opcode == IR_NOTEQUAL);
891
892 /* gen code for children */
893 emit(emitInfo, n->Children[0]);
894 emit(emitInfo, n->Children[1]);
895
896 if (n->Children[0]->Store->Size != n->Children[1]->Store->Size) {
897 /* XXX this error should have been caught in slang_codegen.c */
898 slang_info_log_error(emitInfo->log, "invalid operands to == or !=");
899 n->Store = NULL;
900 return NULL;
901 }
902
903 /* final result is 1 bool */
904 if (!alloc_node_storage(emitInfo, n, 1))
905 return NULL;
906
907 size = n->Children[0]->Store->Size;
908
909 if (size == 1) {
910 gl_inst_opcode opcode = n->Opcode == IR_EQUAL ? OPCODE_SEQ : OPCODE_SNE;
911 inst = emit_instruction(emitInfo,
912 opcode,
913 n->Store, /* dest */
914 n->Children[0]->Store,
915 n->Children[1]->Store,
916 NULL);
917 }
918 else if (size <= 4) {
919 /* compare two vectors.
920 * Unfortunately, there's no instruction to compare vectors and
921 * return a scalar result. Do it with some compare and dot product
922 * instructions...
923 */
924 GLuint swizzle;
925 gl_inst_opcode dotOp;
926 slang_ir_storage tempStore;
927
928 if (!alloc_local_temp(emitInfo, &tempStore, 4)) {
929 n->Store = NULL;
930 return NULL;
931 /* out of temps */
932 }
933
934 if (size == 4) {
935 dotOp = OPCODE_DP4;
936 swizzle = SWIZZLE_XYZW;
937 }
938 else if (size == 3) {
939 dotOp = OPCODE_DP3;
940 swizzle = SWIZZLE_XYZW;
941 }
942 else {
943 assert(size == 2);
944 dotOp = OPCODE_DP3; /* XXX use OPCODE_DP2 eventually */
945 swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y);
946 }
947
948 /* Compute inequality (temp = (A != B)) */
949 inst = emit_instruction(emitInfo,
950 OPCODE_SNE,
951 &tempStore,
952 n->Children[0]->Store,
953 n->Children[1]->Store,
954 NULL);
955 if (!inst) {
956 return NULL;
957 }
958 inst_comment(inst, "Compare values");
959
960 /* Compute val = DOT(temp, temp) (reduction) */
961 inst = emit_instruction(emitInfo,
962 dotOp,
963 n->Store,
964 &tempStore,
965 &tempStore,
966 NULL);
967 if (!inst) {
968 return NULL;
969 }
970 inst->SrcReg[0].Swizzle = inst->SrcReg[1].Swizzle = swizzle; /*override*/
971 inst_comment(inst, "Reduce vec to bool");
972
973 _slang_free_temp(emitInfo->vt, &tempStore); /* free temp */
974
975 if (n->Opcode == IR_EQUAL) {
976 /* compute val = !val.x with SEQ val, val, 0; */
977 slang_ir_storage zero;
978 constant_to_storage(emitInfo, 0.0, &zero);
979 inst = emit_instruction(emitInfo,
980 OPCODE_SEQ,
981 n->Store, /* dest */
982 n->Store,
983 &zero,
984 NULL);
985 if (!inst) {
986 return NULL;
987 }
988 inst_comment(inst, "Invert true/false");
989 }
990 }
991 else {
992 /* size > 4, struct or array compare.
993 * XXX this won't work reliably for structs with padding!!
994 */
995 GLint i, num = (n->Children[0]->Store->Size + 3) / 4;
996 slang_ir_storage accTemp, sneTemp;
997
998 if (!alloc_local_temp(emitInfo, &accTemp, 4))
999 return NULL;
1000
1001 if (!alloc_local_temp(emitInfo, &sneTemp, 4))
1002 return NULL;
1003
1004 for (i = 0; i < num; i++) {
1005 slang_ir_storage srcStore0 = *n->Children[0]->Store;
1006 slang_ir_storage srcStore1 = *n->Children[1]->Store;
1007 srcStore0.Index += i;
1008 srcStore1.Index += i;
1009
1010 if (i == 0) {
1011 /* SNE accTemp, left[i], right[i] */
1012 inst = emit_instruction(emitInfo, OPCODE_SNE,
1013 &accTemp, /* dest */
1014 &srcStore0,
1015 &srcStore1,
1016 NULL);
1017 if (!inst) {
1018 return NULL;
1019 }
1020 inst_comment(inst, "Begin struct/array comparison");
1021 }
1022 else {
1023 /* SNE sneTemp, left[i], right[i] */
1024 inst = emit_instruction(emitInfo, OPCODE_SNE,
1025 &sneTemp, /* dest */
1026 &srcStore0,
1027 &srcStore1,
1028 NULL);
1029 if (!inst) {
1030 return NULL;
1031 }
1032 /* ADD accTemp, accTemp, sneTemp; # like logical-OR */
1033 inst = emit_instruction(emitInfo, OPCODE_ADD,
1034 &accTemp, /* dest */
1035 &accTemp,
1036 &sneTemp,
1037 NULL);
1038 if (!inst) {
1039 return NULL;
1040 }
1041 }
1042 }
1043
1044 /* compute accTemp.x || accTemp.y || accTemp.z || accTemp.w with DOT4 */
1045 inst = emit_instruction(emitInfo, OPCODE_DP4,
1046 n->Store,
1047 &accTemp,
1048 &accTemp,
1049 NULL);
1050 if (!inst) {
1051 return NULL;
1052 }
1053 inst_comment(inst, "End struct/array comparison");
1054
1055 if (n->Opcode == IR_EQUAL) {
1056 /* compute tmp.x = !tmp.x via tmp.x = (tmp.x == 0) */
1057 slang_ir_storage zero;
1058 constant_to_storage(emitInfo, 0.0, &zero);
1059 inst = emit_instruction(emitInfo, OPCODE_SEQ,
1060 n->Store, /* dest */
1061 n->Store,
1062 &zero,
1063 NULL);
1064 if (!inst) {
1065 return NULL;
1066 }
1067 inst_comment(inst, "Invert true/false");
1068 }
1069
1070 _slang_free_temp(emitInfo->vt, &accTemp);
1071 _slang_free_temp(emitInfo->vt, &sneTemp);
1072 }
1073
1074 /* free temps */
1075 free_node_storage(emitInfo->vt, n->Children[0]);
1076 free_node_storage(emitInfo->vt, n->Children[1]);
1077
1078 return inst;
1079 }
1080
1081
1082
1083 /**
1084 * Generate code for an IR_CLAMP instruction.
1085 */
1086 static struct prog_instruction *
1087 emit_clamp(slang_emit_info *emitInfo, slang_ir_node *n)
1088 {
1089 struct prog_instruction *inst;
1090 slang_ir_node tmpNode;
1091
1092 assert(n->Opcode == IR_CLAMP);
1093 /* ch[0] = value
1094 * ch[1] = min limit
1095 * ch[2] = max limit
1096 */
1097
1098 inst = emit(emitInfo, n->Children[0]);
1099
1100 /* If lower limit == 0.0 and upper limit == 1.0,
1101 * set prev instruction's SaturateMode field to SATURATE_ZERO_ONE.
1102 * Else,
1103 * emit OPCODE_MIN, OPCODE_MAX sequence.
1104 */
1105 #if 0
1106 /* XXX this isn't quite finished yet */
1107 if (n->Children[1]->Opcode == IR_FLOAT &&
1108 n->Children[1]->Value[0] == 0.0 &&
1109 n->Children[1]->Value[1] == 0.0 &&
1110 n->Children[1]->Value[2] == 0.0 &&
1111 n->Children[1]->Value[3] == 0.0 &&
1112 n->Children[2]->Opcode == IR_FLOAT &&
1113 n->Children[2]->Value[0] == 1.0 &&
1114 n->Children[2]->Value[1] == 1.0 &&
1115 n->Children[2]->Value[2] == 1.0 &&
1116 n->Children[2]->Value[3] == 1.0) {
1117 if (!inst) {
1118 inst = prev_instruction(prog);
1119 }
1120 if (inst && inst->Opcode != OPCODE_NOP) {
1121 /* and prev instruction's DstReg matches n->Children[0]->Store */
1122 inst->SaturateMode = SATURATE_ZERO_ONE;
1123 n->Store = n->Children[0]->Store;
1124 return inst;
1125 }
1126 }
1127 #else
1128 (void) inst;
1129 #endif
1130
1131 if (!alloc_node_storage(emitInfo, n, n->Children[0]->Store->Size))
1132 return NULL;
1133
1134 emit(emitInfo, n->Children[1]);
1135 emit(emitInfo, n->Children[2]);
1136
1137 /* Some GPUs don't allow reading from output registers. So if the
1138 * dest for this clamp() is an output reg, we can't use that reg for
1139 * the intermediate result. Use a temp register instead.
1140 */
1141 memset(&tmpNode, 0, sizeof(tmpNode));
1142 if (!alloc_node_storage(emitInfo, &tmpNode, n->Store->Size)) {
1143 return NULL;
1144 }
1145
1146 /* tmp = max(ch[0], ch[1]) */
1147 inst = emit_instruction(emitInfo, OPCODE_MAX,
1148 tmpNode.Store, /* dest */
1149 n->Children[0]->Store,
1150 n->Children[1]->Store,
1151 NULL);
1152 if (!inst) {
1153 return NULL;
1154 }
1155
1156 /* n->dest = min(tmp, ch[2]) */
1157 inst = emit_instruction(emitInfo, OPCODE_MIN,
1158 n->Store, /* dest */
1159 tmpNode.Store,
1160 n->Children[2]->Store,
1161 NULL);
1162
1163 free_node_storage(emitInfo->vt, &tmpNode);
1164
1165 return inst;
1166 }
1167
1168
1169 static struct prog_instruction *
1170 emit_negation(slang_emit_info *emitInfo, slang_ir_node *n)
1171 {
1172 /* Implement as MOV dst, -src; */
1173 /* XXX we could look at the previous instruction and in some circumstances
1174 * modify it to accomplish the negation.
1175 */
1176 struct prog_instruction *inst;
1177
1178 emit(emitInfo, n->Children[0]);
1179
1180 if (!alloc_node_storage(emitInfo, n, n->Children[0]->Store->Size))
1181 return NULL;
1182
1183 inst = emit_instruction(emitInfo,
1184 OPCODE_MOV,
1185 n->Store, /* dest */
1186 n->Children[0]->Store,
1187 NULL,
1188 NULL);
1189 if (inst) {
1190 inst->SrcReg[0].Negate = NEGATE_XYZW;
1191 }
1192 return inst;
1193 }
1194
1195
1196 static struct prog_instruction *
1197 emit_label(slang_emit_info *emitInfo, const slang_ir_node *n)
1198 {
1199 assert(n->Label);
1200 #if 0
1201 /* XXX this fails in loop tail code - investigate someday */
1202 assert(_slang_label_get_location(n->Label) < 0);
1203 _slang_label_set_location(n->Label, emitInfo->prog->NumInstructions,
1204 emitInfo->prog);
1205 #else
1206 if (_slang_label_get_location(n->Label) < 0)
1207 _slang_label_set_location(n->Label, emitInfo->prog->NumInstructions,
1208 emitInfo->prog);
1209 #endif
1210 return NULL;
1211 }
1212
1213
1214 /**
1215 * Emit code for a function call.
1216 * Note that for each time a function is called, we emit the function's
1217 * body code again because the set of available registers may be different.
1218 */
1219 static struct prog_instruction *
1220 emit_fcall(slang_emit_info *emitInfo, slang_ir_node *n)
1221 {
1222 struct gl_program *progSave;
1223 struct prog_instruction *inst;
1224 GLuint subroutineId;
1225 GLuint maxInstSave;
1226
1227 assert(n->Opcode == IR_CALL);
1228 assert(n->Label);
1229
1230 /* save/push cur program */
1231 maxInstSave = emitInfo->MaxInstructions;
1232 progSave = emitInfo->prog;
1233
1234 emitInfo->prog = new_subroutine(emitInfo, &subroutineId);
1235 emitInfo->MaxInstructions = emitInfo->prog->NumInstructions;
1236
1237 _slang_label_set_location(n->Label, emitInfo->prog->NumInstructions,
1238 emitInfo->prog);
1239
1240 if (emitInfo->EmitBeginEndSub) {
1241 /* BGNSUB isn't a real instruction.
1242 * We require a label (i.e. "foobar:") though, if we're going to
1243 * print the program in the NV format. The BNGSUB instruction is
1244 * really just a NOP to attach the label to.
1245 */
1246 inst = new_instruction(emitInfo, OPCODE_BGNSUB);
1247 if (!inst) {
1248 return NULL;
1249 }
1250 inst_comment(inst, n->Label->Name);
1251 }
1252
1253 /* body of function: */
1254 emit(emitInfo, n->Children[0]);
1255 n->Store = n->Children[0]->Store;
1256
1257 /* add RET instruction now, if needed */
1258 inst = prev_instruction(emitInfo);
1259 if (inst && inst->Opcode != OPCODE_RET) {
1260 inst = new_instruction(emitInfo, OPCODE_RET);
1261 if (!inst) {
1262 return NULL;
1263 }
1264 }
1265
1266 if (emitInfo->EmitBeginEndSub) {
1267 inst = new_instruction(emitInfo, OPCODE_ENDSUB);
1268 if (!inst) {
1269 return NULL;
1270 }
1271 inst_comment(inst, n->Label->Name);
1272 }
1273
1274 /* pop/restore cur program */
1275 emitInfo->prog = progSave;
1276 emitInfo->MaxInstructions = maxInstSave;
1277
1278 /* emit the function call */
1279 inst = new_instruction(emitInfo, OPCODE_CAL);
1280 if (!inst) {
1281 return NULL;
1282 }
1283 /* The branch target is just the subroutine number (changed later) */
1284 inst->BranchTarget = subroutineId;
1285 inst_comment(inst, n->Label->Name);
1286 assert(inst->BranchTarget >= 0);
1287
1288 return inst;
1289 }
1290
1291
1292 /**
1293 * Emit code for a 'return' statement.
1294 */
1295 static struct prog_instruction *
1296 emit_return(slang_emit_info *emitInfo, slang_ir_node *n)
1297 {
1298 struct prog_instruction *inst;
1299 assert(n);
1300 assert(n->Opcode == IR_RETURN);
1301 assert(n->Label);
1302 inst = new_instruction(emitInfo, OPCODE_RET);
1303 if (inst) {
1304 inst->DstReg.CondMask = COND_TR; /* always return */
1305 }
1306 return inst;
1307 }
1308
1309
1310 static struct prog_instruction *
1311 emit_kill(slang_emit_info *emitInfo)
1312 {
1313 struct gl_fragment_program *fp;
1314 struct prog_instruction *inst;
1315 /* NV-KILL - discard fragment depending on condition code.
1316 * Note that ARB-KILL depends on sign of vector operand.
1317 */
1318 inst = new_instruction(emitInfo, OPCODE_KIL_NV);
1319 if (!inst) {
1320 return NULL;
1321 }
1322 inst->DstReg.CondMask = COND_TR; /* always kill */
1323
1324 assert(emitInfo->prog->Target == GL_FRAGMENT_PROGRAM_ARB);
1325 fp = (struct gl_fragment_program *) emitInfo->prog;
1326 fp->UsesKill = GL_TRUE;
1327
1328 return inst;
1329 }
1330
1331
1332 static struct prog_instruction *
1333 emit_tex(slang_emit_info *emitInfo, slang_ir_node *n)
1334 {
1335 struct prog_instruction *inst;
1336 gl_inst_opcode opcode;
1337 GLboolean shadow = GL_FALSE;
1338
1339 switch (n->Opcode) {
1340 case IR_TEX:
1341 opcode = OPCODE_TEX;
1342 break;
1343 case IR_TEX_SH:
1344 opcode = OPCODE_TEX;
1345 shadow = GL_TRUE;
1346 break;
1347 case IR_TEXB:
1348 opcode = OPCODE_TXB;
1349 break;
1350 case IR_TEXB_SH:
1351 opcode = OPCODE_TXB;
1352 shadow = GL_TRUE;
1353 break;
1354 case IR_TEXP:
1355 opcode = OPCODE_TXP;
1356 break;
1357 case IR_TEXP_SH:
1358 opcode = OPCODE_TXP;
1359 shadow = GL_TRUE;
1360 break;
1361 default:
1362 _mesa_problem(NULL, "Bad IR TEX code");
1363 return NULL;
1364 }
1365
1366 if (n->Children[0]->Opcode == IR_ELEMENT) {
1367 /* array is the sampler (a uniform which'll indicate the texture unit) */
1368 assert(n->Children[0]->Children[0]->Store);
1369 assert(n->Children[0]->Children[0]->Store->File == PROGRAM_SAMPLER);
1370
1371 emit(emitInfo, n->Children[0]);
1372
1373 n->Children[0]->Var = n->Children[0]->Children[0]->Var;
1374 } else {
1375 /* this is the sampler (a uniform which'll indicate the texture unit) */
1376 assert(n->Children[0]->Store);
1377 assert(n->Children[0]->Store->File == PROGRAM_SAMPLER);
1378 }
1379
1380 /* emit code for the texcoord operand */
1381 (void) emit(emitInfo, n->Children[1]);
1382
1383 /* alloc storage for result of texture fetch */
1384 if (!alloc_node_storage(emitInfo, n, 4))
1385 return NULL;
1386
1387 /* emit TEX instruction; Child[1] is the texcoord */
1388 inst = emit_instruction(emitInfo,
1389 opcode,
1390 n->Store,
1391 n->Children[1]->Store,
1392 NULL,
1393 NULL);
1394 if (!inst) {
1395 return NULL;
1396 }
1397
1398 inst->TexShadow = shadow;
1399
1400 /* Store->Index is the uniform/sampler index */
1401 assert(n->Children[0]->Store->Index >= 0);
1402 inst->TexSrcUnit = n->Children[0]->Store->Index;
1403 inst->TexSrcTarget = n->Children[0]->Store->TexTarget;
1404
1405 /* mark the sampler as being used */
1406 _mesa_use_uniform(emitInfo->prog->Parameters,
1407 (char *) n->Children[0]->Var->a_name);
1408
1409 return inst;
1410 }
1411
1412
1413 /**
1414 * Assignment/copy
1415 */
1416 static struct prog_instruction *
1417 emit_copy(slang_emit_info *emitInfo, slang_ir_node *n)
1418 {
1419 struct prog_instruction *inst;
1420
1421 assert(n->Opcode == IR_COPY);
1422
1423 /* lhs */
1424 emit(emitInfo, n->Children[0]);
1425 if (!n->Children[0]->Store || n->Children[0]->Store->Index < 0) {
1426 /* an error should have been already recorded */
1427 return NULL;
1428 }
1429
1430 /* rhs */
1431 assert(n->Children[1]);
1432 inst = emit(emitInfo, n->Children[1]);
1433
1434 if (!n->Children[1]->Store || n->Children[1]->Store->Index < 0) {
1435 if (!emitInfo->log->text && !emitInfo->UnresolvedFunctions) {
1436 /* XXX this error should have been caught in slang_codegen.c */
1437 slang_info_log_error(emitInfo->log, "invalid assignment");
1438 }
1439 return NULL;
1440 }
1441
1442 assert(n->Children[1]->Store->Index >= 0);
1443
1444 /*assert(n->Children[0]->Store->Size == n->Children[1]->Store->Size);*/
1445
1446 n->Store = n->Children[0]->Store;
1447
1448 if (n->Store->File == PROGRAM_SAMPLER) {
1449 /* no code generated for sampler assignments,
1450 * just copy the sampler index/target at compile time.
1451 */
1452 n->Store->Index = n->Children[1]->Store->Index;
1453 n->Store->TexTarget = n->Children[1]->Store->TexTarget;
1454 return NULL;
1455 }
1456
1457 #if PEEPHOLE_OPTIMIZATIONS
1458 if (inst &&
1459 (n->Children[1]->Opcode != IR_SWIZZLE) &&
1460 _slang_is_temp(emitInfo->vt, n->Children[1]->Store) &&
1461 (inst->DstReg.File == n->Children[1]->Store->File) &&
1462 (inst->DstReg.Index == n->Children[1]->Store->Index) &&
1463 !n->Children[0]->Store->IsIndirect &&
1464 n->Children[0]->Store->Size <= 4) {
1465 /* Peephole optimization:
1466 * The Right-Hand-Side has its results in a temporary place.
1467 * Modify the RHS (and the prev instruction) to store its results
1468 * in the destination specified by n->Children[0].
1469 * Then, this MOVE is a no-op.
1470 * Ex:
1471 * MUL tmp, x, y;
1472 * MOV a, tmp;
1473 * becomes:
1474 * MUL a, x, y;
1475 */
1476
1477 /* fixup the previous instruction (which stored the RHS result) */
1478 assert(n->Children[0]->Store->Index >= 0);
1479 storage_to_dst_reg(&inst->DstReg, n->Children[0]->Store);
1480 return inst;
1481 }
1482 else
1483 #endif
1484 {
1485 if (n->Children[0]->Store->Size > 4) {
1486 /* move matrix/struct etc (block of registers) */
1487 slang_ir_storage dstStore = *n->Children[0]->Store;
1488 slang_ir_storage srcStore = *n->Children[1]->Store;
1489 GLint size = srcStore.Size;
1490 ASSERT(n->Children[1]->Store->Swizzle == SWIZZLE_NOOP);
1491 dstStore.Size = 4;
1492 srcStore.Size = 4;
1493 while (size >= 4) {
1494 inst = emit_instruction(emitInfo, OPCODE_MOV,
1495 &dstStore,
1496 &srcStore,
1497 NULL,
1498 NULL);
1499 if (!inst) {
1500 return NULL;
1501 }
1502 inst_comment(inst, "IR_COPY block");
1503 srcStore.Index++;
1504 dstStore.Index++;
1505 size -= 4;
1506 }
1507 }
1508 else {
1509 /* single register move */
1510 char *srcAnnot, *dstAnnot;
1511 assert(n->Children[0]->Store->Index >= 0);
1512 inst = emit_instruction(emitInfo, OPCODE_MOV,
1513 n->Children[0]->Store, /* dest */
1514 n->Children[1]->Store,
1515 NULL,
1516 NULL);
1517 if (!inst) {
1518 return NULL;
1519 }
1520 dstAnnot = storage_annotation(n->Children[0], emitInfo->prog);
1521 srcAnnot = storage_annotation(n->Children[1], emitInfo->prog);
1522 inst->Comment = instruction_annotation(inst->Opcode, dstAnnot,
1523 srcAnnot, NULL, NULL);
1524 }
1525 free_node_storage(emitInfo->vt, n->Children[1]);
1526 return inst;
1527 }
1528 }
1529
1530
1531 /**
1532 * An IR_COND node wraps a boolean expression which is used by an
1533 * IF or WHILE test. This is where we'll set condition codes, if needed.
1534 */
1535 static struct prog_instruction *
1536 emit_cond(slang_emit_info *emitInfo, slang_ir_node *n)
1537 {
1538 struct prog_instruction *inst;
1539
1540 assert(n->Opcode == IR_COND);
1541
1542 if (!n->Children[0])
1543 return NULL;
1544
1545 /* emit code for the expression */
1546 inst = emit(emitInfo, n->Children[0]);
1547
1548 if (!n->Children[0]->Store) {
1549 /* error recovery */
1550 return NULL;
1551 }
1552
1553 assert(n->Children[0]->Store);
1554 /*assert(n->Children[0]->Store->Size == 1);*/
1555
1556 if (emitInfo->EmitCondCodes) {
1557 if (inst &&
1558 n->Children[0]->Store &&
1559 inst->DstReg.File == n->Children[0]->Store->File &&
1560 inst->DstReg.Index == n->Children[0]->Store->Index) {
1561 /* The previous instruction wrote to the register who's value
1562 * we're testing. Just fix that instruction so that the
1563 * condition codes are computed.
1564 */
1565 inst->CondUpdate = GL_TRUE;
1566 n->Store = n->Children[0]->Store;
1567 return inst;
1568 }
1569 else {
1570 /* This'll happen for things like "if (i) ..." where no code
1571 * is normally generated for the expression "i".
1572 * Generate a move instruction just to set condition codes.
1573 */
1574 if (!alloc_node_storage(emitInfo, n, 1))
1575 return NULL;
1576 inst = emit_instruction(emitInfo, OPCODE_MOV,
1577 n->Store, /* dest */
1578 n->Children[0]->Store,
1579 NULL,
1580 NULL);
1581 if (!inst) {
1582 return NULL;
1583 }
1584 inst->CondUpdate = GL_TRUE;
1585 inst_comment(inst, "COND expr");
1586 _slang_free_temp(emitInfo->vt, n->Store);
1587 return inst;
1588 }
1589 }
1590 else {
1591 /* No-op: the boolean result of the expression is in a regular reg */
1592 n->Store = n->Children[0]->Store;
1593 return inst;
1594 }
1595 }
1596
1597
1598 /**
1599 * Logical-NOT
1600 */
1601 static struct prog_instruction *
1602 emit_not(slang_emit_info *emitInfo, slang_ir_node *n)
1603 {
1604 static const struct {
1605 gl_inst_opcode op, opNot;
1606 } operators[] = {
1607 { OPCODE_SLT, OPCODE_SGE },
1608 { OPCODE_SLE, OPCODE_SGT },
1609 { OPCODE_SGT, OPCODE_SLE },
1610 { OPCODE_SGE, OPCODE_SLT },
1611 { OPCODE_SEQ, OPCODE_SNE },
1612 { OPCODE_SNE, OPCODE_SEQ },
1613 { 0, 0 }
1614 };
1615 struct prog_instruction *inst;
1616 slang_ir_storage zero;
1617 GLuint i;
1618
1619 /* child expr */
1620 inst = emit(emitInfo, n->Children[0]);
1621
1622 #if PEEPHOLE_OPTIMIZATIONS
1623 if (inst) {
1624 /* if the prev instruction was a comparison instruction, invert it */
1625 for (i = 0; operators[i].op; i++) {
1626 if (inst->Opcode == operators[i].op) {
1627 inst->Opcode = operators[i].opNot;
1628 n->Store = n->Children[0]->Store;
1629 return inst;
1630 }
1631 }
1632 }
1633 #endif
1634
1635 /* else, invert using SEQ (v = v == 0) */
1636 if (!alloc_node_storage(emitInfo, n, n->Children[0]->Store->Size))
1637 return NULL;
1638
1639 constant_to_storage(emitInfo, 0.0, &zero);
1640 inst = emit_instruction(emitInfo,
1641 OPCODE_SEQ,
1642 n->Store,
1643 n->Children[0]->Store,
1644 &zero,
1645 NULL);
1646 if (!inst) {
1647 return NULL;
1648 }
1649 inst_comment(inst, "NOT");
1650
1651 free_node_storage(emitInfo->vt, n->Children[0]);
1652
1653 return inst;
1654 }
1655
1656
1657 static struct prog_instruction *
1658 emit_if(slang_emit_info *emitInfo, slang_ir_node *n)
1659 {
1660 struct gl_program *prog = emitInfo->prog;
1661 GLuint ifInstLoc, elseInstLoc = 0;
1662 GLuint condWritemask = 0;
1663
1664 /* emit condition expression code */
1665 {
1666 struct prog_instruction *inst;
1667 inst = emit(emitInfo, n->Children[0]);
1668 if (emitInfo->EmitCondCodes) {
1669 if (!inst) {
1670 /* error recovery */
1671 return NULL;
1672 }
1673 condWritemask = inst->DstReg.WriteMask;
1674 }
1675 }
1676
1677 if (!n->Children[0]->Store)
1678 return NULL;
1679
1680 #if 0
1681 assert(n->Children[0]->Store->Size == 1); /* a bool! */
1682 #endif
1683
1684 ifInstLoc = prog->NumInstructions;
1685 if (emitInfo->EmitHighLevelInstructions) {
1686 if (emitInfo->EmitCondCodes) {
1687 /* IF condcode THEN ... */
1688 struct prog_instruction *ifInst = new_instruction(emitInfo, OPCODE_IF);
1689 if (!ifInst) {
1690 return NULL;
1691 }
1692 ifInst->DstReg.CondMask = COND_NE; /* if cond is non-zero */
1693 /* only test the cond code (1 of 4) that was updated by the
1694 * previous instruction.
1695 */
1696 ifInst->DstReg.CondSwizzle = writemask_to_swizzle(condWritemask);
1697 }
1698 else {
1699 struct prog_instruction *inst;
1700
1701 /* IF src[0] THEN ... */
1702 inst = emit_instruction(emitInfo, OPCODE_IF,
1703 NULL, /* dst */
1704 n->Children[0]->Store, /* op0 */
1705 NULL,
1706 NULL);
1707 if (!inst) {
1708 return NULL;
1709 }
1710 }
1711 }
1712 else {
1713 /* conditional jump to else, or endif */
1714 struct prog_instruction *ifInst = new_instruction(emitInfo, OPCODE_BRA);
1715 if (!ifInst) {
1716 return NULL;
1717 }
1718 ifInst->DstReg.CondMask = COND_EQ; /* BRA if cond is zero */
1719 inst_comment(ifInst, "if zero");
1720 ifInst->DstReg.CondSwizzle = writemask_to_swizzle(condWritemask);
1721 }
1722
1723 /* if body */
1724 emit(emitInfo, n->Children[1]);
1725
1726 if (n->Children[2]) {
1727 /* have else body */
1728 elseInstLoc = prog->NumInstructions;
1729 if (emitInfo->EmitHighLevelInstructions) {
1730 struct prog_instruction *inst = new_instruction(emitInfo, OPCODE_ELSE);
1731 if (!inst) {
1732 return NULL;
1733 }
1734 prog->Instructions[ifInstLoc].BranchTarget = prog->NumInstructions - 1;
1735 }
1736 else {
1737 /* jump to endif instruction */
1738 struct prog_instruction *inst = new_instruction(emitInfo, OPCODE_BRA);
1739 if (!inst) {
1740 return NULL;
1741 }
1742 inst_comment(inst, "else");
1743 inst->DstReg.CondMask = COND_TR; /* always branch */
1744 prog->Instructions[ifInstLoc].BranchTarget = prog->NumInstructions;
1745 }
1746 emit(emitInfo, n->Children[2]);
1747 }
1748 else {
1749 /* no else body */
1750 prog->Instructions[ifInstLoc].BranchTarget = prog->NumInstructions;
1751 }
1752
1753 if (emitInfo->EmitHighLevelInstructions) {
1754 struct prog_instruction *inst = new_instruction(emitInfo, OPCODE_ENDIF);
1755 if (!inst) {
1756 return NULL;
1757 }
1758 }
1759
1760 if (elseInstLoc) {
1761 /* point ELSE instruction BranchTarget at ENDIF */
1762 if (emitInfo->EmitHighLevelInstructions) {
1763 prog->Instructions[elseInstLoc].BranchTarget = prog->NumInstructions - 1;
1764 }
1765 else {
1766 prog->Instructions[elseInstLoc].BranchTarget = prog->NumInstructions;
1767 }
1768 }
1769 return NULL;
1770 }
1771
1772
1773 static struct prog_instruction *
1774 emit_loop(slang_emit_info *emitInfo, slang_ir_node *n)
1775 {
1776 struct gl_program *prog = emitInfo->prog;
1777 struct prog_instruction *endInst;
1778 GLuint beginInstLoc, tailInstLoc, endInstLoc;
1779 slang_ir_node *ir;
1780
1781 /* emit OPCODE_BGNLOOP */
1782 beginInstLoc = prog->NumInstructions;
1783 if (emitInfo->EmitHighLevelInstructions) {
1784 struct prog_instruction *inst = new_instruction(emitInfo, OPCODE_BGNLOOP);
1785 if (!inst) {
1786 return NULL;
1787 }
1788 }
1789
1790 /* body */
1791 emit(emitInfo, n->Children[0]);
1792
1793 /* tail */
1794 tailInstLoc = prog->NumInstructions;
1795 if (n->Children[1]) {
1796 if (emitInfo->EmitComments)
1797 emit_comment(emitInfo, "Loop tail code:");
1798 emit(emitInfo, n->Children[1]);
1799 }
1800
1801 endInstLoc = prog->NumInstructions;
1802 if (emitInfo->EmitHighLevelInstructions) {
1803 /* emit OPCODE_ENDLOOP */
1804 endInst = new_instruction(emitInfo, OPCODE_ENDLOOP);
1805 if (!endInst) {
1806 return NULL;
1807 }
1808 }
1809 else {
1810 /* emit unconditional BRA-nch */
1811 endInst = new_instruction(emitInfo, OPCODE_BRA);
1812 if (!endInst) {
1813 return NULL;
1814 }
1815 endInst->DstReg.CondMask = COND_TR; /* always true */
1816 }
1817 /* ENDLOOP's BranchTarget points to the BGNLOOP inst */
1818 endInst->BranchTarget = beginInstLoc;
1819
1820 if (emitInfo->EmitHighLevelInstructions) {
1821 /* BGNLOOP's BranchTarget points to the ENDLOOP inst */
1822 prog->Instructions[beginInstLoc].BranchTarget = prog->NumInstructions -1;
1823 }
1824
1825 /* Done emitting loop code. Now walk over the loop's linked list of
1826 * BREAK and CONT nodes, filling in their BranchTarget fields (which
1827 * will point to the corresponding ENDLOOP instruction.
1828 */
1829 for (ir = n->List; ir; ir = ir->List) {
1830 struct prog_instruction *inst = prog->Instructions + ir->InstLocation;
1831 assert(inst->BranchTarget < 0);
1832 if (ir->Opcode == IR_BREAK ||
1833 ir->Opcode == IR_BREAK_IF_TRUE) {
1834 assert(inst->Opcode == OPCODE_BRK ||
1835 inst->Opcode == OPCODE_BRA);
1836 /* go to instruction at end of loop */
1837 if (emitInfo->EmitHighLevelInstructions) {
1838 inst->BranchTarget = endInstLoc;
1839 }
1840 else {
1841 inst->BranchTarget = endInstLoc + 1;
1842 }
1843 }
1844 else {
1845 assert(ir->Opcode == IR_CONT ||
1846 ir->Opcode == IR_CONT_IF_TRUE);
1847 assert(inst->Opcode == OPCODE_CONT ||
1848 inst->Opcode == OPCODE_BRA);
1849 /* go to instruction at tail of loop */
1850 inst->BranchTarget = endInstLoc;
1851 }
1852 }
1853 return NULL;
1854 }
1855
1856
1857 /**
1858 * Unconditional "continue" or "break" statement.
1859 * Either OPCODE_CONT, OPCODE_BRK or OPCODE_BRA will be emitted.
1860 */
1861 static struct prog_instruction *
1862 emit_cont_break(slang_emit_info *emitInfo, slang_ir_node *n)
1863 {
1864 gl_inst_opcode opcode;
1865 struct prog_instruction *inst;
1866
1867 if (n->Opcode == IR_CONT) {
1868 /* we need to execute the loop's tail code before doing CONT */
1869 assert(n->Parent);
1870 assert(n->Parent->Opcode == IR_LOOP);
1871 if (n->Parent->Children[1]) {
1872 /* emit tail code */
1873 if (emitInfo->EmitComments) {
1874 emit_comment(emitInfo, "continue - tail code:");
1875 }
1876 emit(emitInfo, n->Parent->Children[1]);
1877 }
1878 }
1879
1880 /* opcode selection */
1881 if (emitInfo->EmitHighLevelInstructions) {
1882 opcode = (n->Opcode == IR_CONT) ? OPCODE_CONT : OPCODE_BRK;
1883 }
1884 else {
1885 opcode = OPCODE_BRA;
1886 }
1887 n->InstLocation = emitInfo->prog->NumInstructions;
1888 inst = new_instruction(emitInfo, opcode);
1889 if (inst) {
1890 inst->DstReg.CondMask = COND_TR; /* always true */
1891 }
1892 return inst;
1893 }
1894
1895
1896 /**
1897 * Conditional "continue" or "break" statement.
1898 * Either OPCODE_CONT, OPCODE_BRK or OPCODE_BRA will be emitted.
1899 */
1900 static struct prog_instruction *
1901 emit_cont_break_if_true(slang_emit_info *emitInfo, slang_ir_node *n)
1902 {
1903 struct prog_instruction *inst;
1904
1905 assert(n->Opcode == IR_CONT_IF_TRUE ||
1906 n->Opcode == IR_BREAK_IF_TRUE);
1907
1908 /* evaluate condition expr, setting cond codes */
1909 inst = emit(emitInfo, n->Children[0]);
1910 if (emitInfo->EmitCondCodes) {
1911 assert(inst);
1912 inst->CondUpdate = GL_TRUE;
1913 }
1914
1915 n->InstLocation = emitInfo->prog->NumInstructions;
1916
1917 /* opcode selection */
1918 if (emitInfo->EmitHighLevelInstructions) {
1919 const gl_inst_opcode opcode
1920 = (n->Opcode == IR_CONT_IF_TRUE) ? OPCODE_CONT : OPCODE_BRK;
1921 if (emitInfo->EmitCondCodes) {
1922 /* Get the writemask from the previous instruction which set
1923 * the condcodes. Use that writemask as the CondSwizzle.
1924 */
1925 const GLuint condWritemask = inst->DstReg.WriteMask;
1926 inst = new_instruction(emitInfo, opcode);
1927 if (inst) {
1928 inst->DstReg.CondMask = COND_NE;
1929 inst->DstReg.CondSwizzle = writemask_to_swizzle(condWritemask);
1930 }
1931 return inst;
1932 }
1933 else {
1934 /* IF reg
1935 * BRK/CONT;
1936 * ENDIF
1937 */
1938 GLint ifInstLoc;
1939 ifInstLoc = emitInfo->prog->NumInstructions;
1940 inst = emit_instruction(emitInfo, OPCODE_IF,
1941 NULL, /* dest */
1942 n->Children[0]->Store,
1943 NULL,
1944 NULL);
1945 if (!inst) {
1946 return NULL;
1947 }
1948 n->InstLocation = emitInfo->prog->NumInstructions;
1949
1950 inst = new_instruction(emitInfo, opcode);
1951 if (!inst) {
1952 return NULL;
1953 }
1954 inst = new_instruction(emitInfo, OPCODE_ENDIF);
1955 if (!inst) {
1956 return NULL;
1957 }
1958
1959 emitInfo->prog->Instructions[ifInstLoc].BranchTarget
1960 = emitInfo->prog->NumInstructions - 1;
1961 return inst;
1962 }
1963 }
1964 else {
1965 const GLuint condWritemask = inst->DstReg.WriteMask;
1966 assert(emitInfo->EmitCondCodes);
1967 inst = new_instruction(emitInfo, OPCODE_BRA);
1968 if (inst) {
1969 inst->DstReg.CondMask = COND_NE;
1970 inst->DstReg.CondSwizzle = writemask_to_swizzle(condWritemask);
1971 }
1972 return inst;
1973 }
1974 }
1975
1976
1977 /**
1978 * Return the size of a swizzle mask given that some swizzle components
1979 * may be NIL/undefined. For example:
1980 * swizzle_size(".zzxx") = 4
1981 * swizzle_size(".xy??") = 2
1982 * swizzle_size(".w???") = 1
1983 */
1984 static GLuint
1985 swizzle_size(GLuint swizzle)
1986 {
1987 GLuint i;
1988 for (i = 0; i < 4; i++) {
1989 if (GET_SWZ(swizzle, i) == SWIZZLE_NIL)
1990 return i;
1991 }
1992 return 4;
1993 }
1994
1995
1996 static struct prog_instruction *
1997 emit_swizzle(slang_emit_info *emitInfo, slang_ir_node *n)
1998 {
1999 struct prog_instruction *inst;
2000
2001 inst = emit(emitInfo, n->Children[0]);
2002
2003 if (!n->Store->Parent) {
2004 /* this covers a case such as "(b ? p : q).x" */
2005 n->Store->Parent = n->Children[0]->Store;
2006 assert(n->Store->Parent);
2007 }
2008
2009 {
2010 const GLuint swizzle = n->Store->Swizzle;
2011 /* new storage is parent storage with updated Swizzle + Size fields */
2012 _slang_copy_ir_storage(n->Store, n->Store->Parent);
2013 /* Apply this node's swizzle to parent's storage */
2014 n->Store->Swizzle = _slang_swizzle_swizzle(n->Store->Swizzle, swizzle);
2015 /* Update size */
2016 n->Store->Size = swizzle_size(n->Store->Swizzle);
2017 }
2018
2019 assert(!n->Store->Parent);
2020 assert(n->Store->Index >= 0);
2021
2022 return inst;
2023 }
2024
2025
2026 /**
2027 * Dereference array element: element == array[index]
2028 * This basically involves emitting code for computing the array index
2029 * and updating the node/element's storage info.
2030 */
2031 static struct prog_instruction *
2032 emit_array_element(slang_emit_info *emitInfo, slang_ir_node *n)
2033 {
2034 slang_ir_storage *arrayStore, *indexStore;
2035 const int elemSize = n->Store->Size; /* number of floats */
2036 const GLint elemSizeVec = (elemSize + 3) / 4; /* number of vec4 */
2037 struct prog_instruction *inst;
2038
2039 assert(n->Opcode == IR_ELEMENT);
2040 assert(elemSize > 0);
2041
2042 /* special case for built-in state variables, like light state */
2043 {
2044 slang_ir_storage *root = n->Store;
2045 assert(!root->Parent);
2046 while (root->Parent)
2047 root = root->Parent;
2048
2049 if (root->File == PROGRAM_STATE_VAR) {
2050 GLboolean direct;
2051 GLint index =
2052 _slang_alloc_statevar(n, emitInfo->prog->Parameters, &direct);
2053 if (index < 0) {
2054 /* error */
2055 return NULL;
2056 }
2057 if (direct) {
2058 n->Store->Index = index;
2059 return NULL; /* all done */
2060 }
2061 }
2062 }
2063
2064 /* do codegen for array itself */
2065 emit(emitInfo, n->Children[0]);
2066 arrayStore = n->Children[0]->Store;
2067
2068 /* The initial array element storage is the array's storage,
2069 * then modified below.
2070 */
2071 _slang_copy_ir_storage(n->Store, arrayStore);
2072
2073
2074 if (n->Children[1]->Opcode == IR_FLOAT) {
2075 /* Constant array index */
2076 const GLint element = (GLint) n->Children[1]->Value[0];
2077
2078 /* this element's storage is the array's storage, plus constant offset */
2079 n->Store->Index += elemSizeVec * element;
2080 }
2081 else {
2082 /* Variable array index */
2083
2084 /* do codegen for array index expression */
2085 emit(emitInfo, n->Children[1]);
2086 indexStore = n->Children[1]->Store;
2087
2088 if (indexStore->IsIndirect) {
2089 /* need to put the array index into a temporary since we can't
2090 * directly support a[b[i]] constructs.
2091 */
2092
2093
2094 /*indexStore = tempstore();*/
2095 }
2096
2097
2098 if (elemSize > 4) {
2099 /* need to multiply array index by array element size */
2100 struct prog_instruction *inst;
2101 slang_ir_storage *indexTemp;
2102 slang_ir_storage elemSizeStore;
2103
2104 /* allocate 1 float indexTemp */
2105 indexTemp = _slang_new_ir_storage(PROGRAM_TEMPORARY, -1, 1);
2106 _slang_alloc_temp(emitInfo->vt, indexTemp);
2107
2108 /* allocate a constant containing the element size */
2109 constant_to_storage(emitInfo, (float) elemSizeVec, &elemSizeStore);
2110
2111 /* multiply array index by element size */
2112 inst = emit_instruction(emitInfo,
2113 OPCODE_MUL,
2114 indexTemp, /* dest */
2115 indexStore, /* the index */
2116 &elemSizeStore,
2117 NULL);
2118 if (!inst) {
2119 return NULL;
2120 }
2121
2122 indexStore = indexTemp;
2123 }
2124
2125 if (arrayStore->IsIndirect) {
2126 /* ex: in a[i][j], a[i] (the arrayStore) is indirect */
2127 /* Need to add indexStore to arrayStore->Indirect store */
2128 slang_ir_storage indirectArray;
2129 slang_ir_storage *indexTemp;
2130
2131 _slang_init_ir_storage(&indirectArray,
2132 arrayStore->IndirectFile,
2133 arrayStore->IndirectIndex,
2134 1,
2135 arrayStore->IndirectSwizzle);
2136
2137 /* allocate 1 float indexTemp */
2138 indexTemp = _slang_new_ir_storage(PROGRAM_TEMPORARY, -1, 1);
2139 _slang_alloc_temp(emitInfo->vt, indexTemp);
2140
2141 inst = emit_instruction(emitInfo,
2142 OPCODE_ADD,
2143 indexTemp, /* dest */
2144 indexStore, /* the index */
2145 &indirectArray, /* indirect array base */
2146 NULL);
2147 if (!inst) {
2148 return NULL;
2149 }
2150
2151 indexStore = indexTemp;
2152 }
2153
2154 /* update the array element storage info */
2155 n->Store->IsIndirect = GL_TRUE;
2156 n->Store->IndirectFile = indexStore->File;
2157 n->Store->IndirectIndex = indexStore->Index;
2158 n->Store->IndirectSwizzle = indexStore->Swizzle;
2159 }
2160
2161 n->Store->Size = elemSize;
2162 n->Store->Swizzle = _slang_var_swizzle(elemSize, 0);
2163
2164 return NULL; /* no instruction */
2165 }
2166
2167
2168 /**
2169 * Resolve storage for accessing a structure field.
2170 */
2171 static struct prog_instruction *
2172 emit_struct_field(slang_emit_info *emitInfo, slang_ir_node *n)
2173 {
2174 slang_ir_storage *root = n->Store;
2175 GLint fieldOffset, fieldSize;
2176
2177 assert(n->Opcode == IR_FIELD);
2178
2179 assert(!root->Parent);
2180 while (root->Parent)
2181 root = root->Parent;
2182
2183 /* If this is the field of a state var, allocate constant/uniform
2184 * storage for it now if we haven't already.
2185 * Note that we allocate storage (uniform/constant slots) for state
2186 * variables here rather than at declaration time so we only allocate
2187 * space for the ones that we actually use!
2188 */
2189 if (root->File == PROGRAM_STATE_VAR) {
2190 GLboolean direct;
2191 GLint index = _slang_alloc_statevar(n, emitInfo->prog->Parameters, &direct);
2192 if (index < 0) {
2193 slang_info_log_error(emitInfo->log, "Error parsing state variable");
2194 return NULL;
2195 }
2196 if (direct) {
2197 root->Index = index;
2198 return NULL; /* all done */
2199 }
2200 }
2201
2202 /* do codegen for struct */
2203 emit(emitInfo, n->Children[0]);
2204 assert(n->Children[0]->Store->Index >= 0);
2205
2206
2207 fieldOffset = n->Store->Index;
2208 fieldSize = n->Store->Size;
2209
2210 _slang_copy_ir_storage(n->Store, n->Children[0]->Store);
2211
2212 n->Store->Index = n->Children[0]->Store->Index + fieldOffset / 4;
2213 n->Store->Size = fieldSize;
2214
2215 switch (fieldSize) {
2216 case 1:
2217 {
2218 GLint swz = fieldOffset % 4;
2219 n->Store->Swizzle = MAKE_SWIZZLE4(swz, swz, swz, swz);
2220 }
2221 break;
2222 case 2:
2223 n->Store->Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
2224 SWIZZLE_NIL, SWIZZLE_NIL);
2225 break;
2226 case 3:
2227 n->Store->Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
2228 SWIZZLE_Z, SWIZZLE_NIL);
2229 break;
2230 default:
2231 n->Store->Swizzle = SWIZZLE_XYZW;
2232 }
2233
2234 assert(n->Store->Index >= 0);
2235
2236 return NULL; /* no instruction */
2237 }
2238
2239
2240 /**
2241 * Emit code for a variable declaration.
2242 * This usually doesn't result in any code generation, but just
2243 * memory allocation.
2244 */
2245 static struct prog_instruction *
2246 emit_var_decl(slang_emit_info *emitInfo, slang_ir_node *n)
2247 {
2248 assert(n->Store);
2249 assert(n->Store->File != PROGRAM_UNDEFINED);
2250 assert(n->Store->Size > 0);
2251 /*assert(n->Store->Index < 0);*/
2252
2253 if (!n->Var || n->Var->isTemp) {
2254 /* a nameless/temporary variable, will be freed after first use */
2255 /*NEW*/
2256 if (n->Store->Index < 0 && !_slang_alloc_temp(emitInfo->vt, n->Store)) {
2257 slang_info_log_error(emitInfo->log,
2258 "Ran out of registers, too many temporaries");
2259 return NULL;
2260 }
2261 }
2262 else {
2263 /* a regular variable */
2264 _slang_add_variable(emitInfo->vt, n->Var);
2265 if (!_slang_alloc_var(emitInfo->vt, n->Store)) {
2266 slang_info_log_error(emitInfo->log,
2267 "Ran out of registers, too many variables");
2268 return NULL;
2269 }
2270 /*
2271 printf("IR_VAR_DECL %s %d store %p\n",
2272 (char*) n->Var->a_name, n->Store->Index, (void*) n->Store);
2273 */
2274 assert(n->Var->store == n->Store);
2275 }
2276 if (emitInfo->EmitComments) {
2277 /* emit NOP with comment describing the variable's storage location */
2278 char s[1000];
2279 _mesa_snprintf(s, sizeof(s), "TEMP[%d]%s = variable %s (size %d)",
2280 n->Store->Index,
2281 _mesa_swizzle_string(n->Store->Swizzle, 0, GL_FALSE),
2282 (n->Var ? (char *) n->Var->a_name : "anonymous"),
2283 n->Store->Size);
2284 emit_comment(emitInfo, s);
2285 }
2286 return NULL;
2287 }
2288
2289
2290 /**
2291 * Emit code for a reference to a variable.
2292 * Actually, no code is generated but we may do some memory allocation.
2293 * In particular, state vars (uniforms) are allocated on an as-needed basis.
2294 */
2295 static struct prog_instruction *
2296 emit_var_ref(slang_emit_info *emitInfo, slang_ir_node *n)
2297 {
2298 assert(n->Store);
2299 assert(n->Store->File != PROGRAM_UNDEFINED);
2300
2301 if (n->Store->File == PROGRAM_STATE_VAR && n->Store->Index < 0) {
2302 GLboolean direct;
2303 GLint index = _slang_alloc_statevar(n, emitInfo->prog->Parameters, &direct);
2304 if (index < 0) {
2305 /* error */
2306 char s[100];
2307 /* XXX isn't this really an out of memory/resources error? */
2308 _mesa_snprintf(s, sizeof(s), "Undefined variable '%s'",
2309 (char *) n->Var->a_name);
2310 slang_info_log_error(emitInfo->log, s);
2311 return NULL;
2312 }
2313
2314 n->Store->Index = index;
2315 }
2316 else if (n->Store->File == PROGRAM_UNIFORM ||
2317 n->Store->File == PROGRAM_SAMPLER) {
2318 /* mark var as used */
2319 _mesa_use_uniform(emitInfo->prog->Parameters, (char *) n->Var->a_name);
2320 }
2321 else if (n->Store->File == PROGRAM_INPUT) {
2322 assert(n->Store->Index >= 0);
2323 /* geometry shaders have the input index in the second
2324 * index */
2325 if (emitInfo->prog->Target == MESA_GEOMETRY_PROGRAM &&
2326 n->Store->Is2D) {
2327 emitInfo->prog->InputsRead |= (1 << n->Store->Index2);
2328 } else
2329 emitInfo->prog->InputsRead |= (1 << n->Store->Index);
2330 }
2331
2332 if (n->Store->Index < 0) {
2333 /* probably ran out of registers */
2334 return NULL;
2335 }
2336 assert(n->Store->Size > 0);
2337
2338 return NULL;
2339 }
2340
2341
2342 static struct prog_instruction *
2343 emit(slang_emit_info *emitInfo, slang_ir_node *n)
2344 {
2345 struct prog_instruction *inst;
2346 if (!n)
2347 return NULL;
2348
2349 if (emitInfo->log->error_flag) {
2350 return NULL;
2351 }
2352
2353 if (n->Comment) {
2354 inst = new_instruction(emitInfo, OPCODE_NOP);
2355 if (inst) {
2356 inst->Comment = _mesa_strdup(n->Comment);
2357 }
2358 inst = NULL;
2359 }
2360
2361 switch (n->Opcode) {
2362 case IR_SEQ:
2363 /* sequence of two sub-trees */
2364 assert(n->Children[0]);
2365 assert(n->Children[1]);
2366 emit(emitInfo, n->Children[0]);
2367 if (emitInfo->log->error_flag)
2368 return NULL;
2369 inst = emit(emitInfo, n->Children[1]);
2370 #if 0
2371 assert(!n->Store);
2372 #endif
2373 if (n->Children[1]->Store)
2374 n->Store = n->Children[1]->Store;
2375 else
2376 n->Store = n->Children[0]->Store;
2377 return inst;
2378
2379 case IR_SCOPE:
2380 /* new variable scope */
2381 _slang_push_var_table(emitInfo->vt);
2382 inst = emit(emitInfo, n->Children[0]);
2383 _slang_pop_var_table(emitInfo->vt);
2384 n->Store = n->Children[0]->Store;
2385 return inst;
2386
2387 case IR_VAR_DECL:
2388 /* Variable declaration - allocate a register for it */
2389 inst = emit_var_decl(emitInfo, n);
2390 return inst;
2391
2392 case IR_VAR:
2393 /* Reference to a variable
2394 * Storage should have already been resolved/allocated.
2395 */
2396 return emit_var_ref(emitInfo, n);
2397
2398 case IR_ELEMENT:
2399 return emit_array_element(emitInfo, n);
2400 case IR_FIELD:
2401 return emit_struct_field(emitInfo, n);
2402 case IR_SWIZZLE:
2403 return emit_swizzle(emitInfo, n);
2404
2405 /* Simple arithmetic */
2406 /* unary */
2407 case IR_MOVE:
2408 case IR_RSQ:
2409 case IR_RCP:
2410 case IR_FLOOR:
2411 case IR_FRAC:
2412 case IR_F_TO_I:
2413 case IR_I_TO_F:
2414 case IR_ABS:
2415 case IR_SIN:
2416 case IR_COS:
2417 case IR_DDX:
2418 case IR_DDY:
2419 case IR_EXP:
2420 case IR_EXP2:
2421 case IR_LOG2:
2422 case IR_NOISE1:
2423 case IR_NOISE2:
2424 case IR_NOISE3:
2425 case IR_NOISE4:
2426 case IR_NRM4:
2427 case IR_NRM3:
2428 /* binary */
2429 case IR_ADD:
2430 case IR_SUB:
2431 case IR_MUL:
2432 case IR_DOT4:
2433 case IR_DOT3:
2434 case IR_DOT2:
2435 case IR_CROSS:
2436 case IR_MIN:
2437 case IR_MAX:
2438 case IR_SEQUAL:
2439 case IR_SNEQUAL:
2440 case IR_SGE:
2441 case IR_SGT:
2442 case IR_SLE:
2443 case IR_SLT:
2444 case IR_POW:
2445 /* trinary operators */
2446 case IR_LRP:
2447 case IR_CMP:
2448 return emit_arith(emitInfo, n);
2449
2450 case IR_EQUAL:
2451 case IR_NOTEQUAL:
2452 return emit_compare(emitInfo, n);
2453
2454 case IR_CLAMP:
2455 return emit_clamp(emitInfo, n);
2456 case IR_TEX:
2457 case IR_TEXB:
2458 case IR_TEXP:
2459 case IR_TEX_SH:
2460 case IR_TEXB_SH:
2461 case IR_TEXP_SH:
2462 return emit_tex(emitInfo, n);
2463 case IR_NEG:
2464 return emit_negation(emitInfo, n);
2465 case IR_FLOAT:
2466 /* find storage location for this float constant */
2467 n->Store->Index = _mesa_add_unnamed_constant(emitInfo->prog->Parameters,
2468 n->Value,
2469 n->Store->Size,
2470 &n->Store->Swizzle);
2471 if (n->Store->Index < 0) {
2472 slang_info_log_error(emitInfo->log, "Ran out of space for constants");
2473 return NULL;
2474 }
2475 return NULL;
2476
2477 case IR_COPY:
2478 return emit_copy(emitInfo, n);
2479
2480 case IR_COND:
2481 return emit_cond(emitInfo, n);
2482
2483 case IR_NOT:
2484 return emit_not(emitInfo, n);
2485
2486 case IR_LABEL:
2487 return emit_label(emitInfo, n);
2488
2489 case IR_KILL:
2490 return emit_kill(emitInfo);
2491
2492 case IR_CALL:
2493 /* new variable scope for subroutines/function calls */
2494 _slang_push_var_table(emitInfo->vt);
2495 inst = emit_fcall(emitInfo, n);
2496 _slang_pop_var_table(emitInfo->vt);
2497 return inst;
2498
2499 case IR_IF:
2500 return emit_if(emitInfo, n);
2501
2502 case IR_LOOP:
2503 return emit_loop(emitInfo, n);
2504 case IR_BREAK_IF_TRUE:
2505 case IR_CONT_IF_TRUE:
2506 return emit_cont_break_if_true(emitInfo, n);
2507 case IR_BREAK:
2508 /* fall-through */
2509 case IR_CONT:
2510 return emit_cont_break(emitInfo, n);
2511
2512 case IR_BEGIN_SUB:
2513 return new_instruction(emitInfo, OPCODE_BGNSUB);
2514 case IR_END_SUB:
2515 return new_instruction(emitInfo, OPCODE_ENDSUB);
2516 case IR_RETURN:
2517 return emit_return(emitInfo, n);
2518
2519 case IR_NOP:
2520 return NULL;
2521
2522 case IR_EMIT_VERTEX:
2523 return new_instruction(emitInfo, OPCODE_EMIT_VERTEX);
2524 case IR_END_PRIMITIVE:
2525 return new_instruction(emitInfo, OPCODE_END_PRIMITIVE);
2526
2527 default:
2528 _mesa_problem(NULL, "Unexpected IR opcode in emit()\n");
2529 }
2530 return NULL;
2531 }
2532
2533
2534 /**
2535 * After code generation, any subroutines will be in separate program
2536 * objects. This function appends all the subroutines onto the main
2537 * program and resolves the linking of all the branch/call instructions.
2538 * XXX this logic should really be part of the linking process...
2539 */
2540 static void
2541 _slang_resolve_subroutines(slang_emit_info *emitInfo)
2542 {
2543 GET_CURRENT_CONTEXT(ctx);
2544 struct gl_program *mainP = emitInfo->prog;
2545 GLuint *subroutineLoc, i, total;
2546
2547 subroutineLoc
2548 = (GLuint *) malloc(emitInfo->NumSubroutines * sizeof(GLuint));
2549
2550 /* total number of instructions */
2551 total = mainP->NumInstructions;
2552 for (i = 0; i < emitInfo->NumSubroutines; i++) {
2553 subroutineLoc[i] = total;
2554 total += emitInfo->Subroutines[i]->NumInstructions;
2555 }
2556
2557 /* adjust BranchTargets within the functions */
2558 for (i = 0; i < emitInfo->NumSubroutines; i++) {
2559 struct gl_program *sub = emitInfo->Subroutines[i];
2560 GLuint j;
2561 for (j = 0; j < sub->NumInstructions; j++) {
2562 struct prog_instruction *inst = sub->Instructions + j;
2563 if (inst->Opcode != OPCODE_CAL && inst->BranchTarget >= 0) {
2564 inst->BranchTarget += subroutineLoc[i];
2565 }
2566 }
2567 }
2568
2569 /* append subroutines' instructions after main's instructions */
2570 mainP->Instructions = _mesa_realloc_instructions(mainP->Instructions,
2571 mainP->NumInstructions,
2572 total);
2573 mainP->NumInstructions = total;
2574 for (i = 0; i < emitInfo->NumSubroutines; i++) {
2575 struct gl_program *sub = emitInfo->Subroutines[i];
2576 _mesa_copy_instructions(mainP->Instructions + subroutineLoc[i],
2577 sub->Instructions,
2578 sub->NumInstructions);
2579 /* delete subroutine code */
2580 sub->Parameters = NULL; /* prevent double-free */
2581 _mesa_reference_program(ctx, &emitInfo->Subroutines[i], NULL);
2582 }
2583
2584 /* free subroutine list */
2585 if (emitInfo->Subroutines) {
2586 free(emitInfo->Subroutines);
2587 emitInfo->Subroutines = NULL;
2588 }
2589 emitInfo->NumSubroutines = 0;
2590
2591 /* Examine CAL instructions.
2592 * At this point, the BranchTarget field of the CAL instruction is
2593 * the number/id of the subroutine to call (an index into the
2594 * emitInfo->Subroutines list).
2595 * Translate that into an actual instruction location now.
2596 */
2597 for (i = 0; i < mainP->NumInstructions; i++) {
2598 struct prog_instruction *inst = mainP->Instructions + i;
2599 if (inst->Opcode == OPCODE_CAL) {
2600 const GLuint f = inst->BranchTarget;
2601 inst->BranchTarget = subroutineLoc[f];
2602 }
2603 }
2604
2605 free(subroutineLoc);
2606 }
2607
2608
2609
2610 /**
2611 * Convert the IR tree into GPU instructions.
2612 * \param n root of IR tree
2613 * \param vt variable table
2614 * \param prog program to put GPU instructions into
2615 * \param pragmas controls codegen options
2616 * \param withEnd if true, emit END opcode at end
2617 * \param log log for emitting errors/warnings/info
2618 */
2619 GLboolean
2620 _slang_emit_code(slang_ir_node *n, slang_var_table *vt,
2621 struct gl_program *prog,
2622 const struct gl_sl_pragmas *pragmas,
2623 GLboolean withEnd,
2624 slang_info_log *log)
2625 {
2626 GET_CURRENT_CONTEXT(ctx);
2627 GLboolean success;
2628 slang_emit_info emitInfo;
2629 GLuint maxUniforms;
2630
2631 emitInfo.log = log;
2632 emitInfo.vt = vt;
2633 emitInfo.prog = prog;
2634 emitInfo.Subroutines = NULL;
2635 emitInfo.NumSubroutines = 0;
2636 emitInfo.MaxInstructions = prog->NumInstructions;
2637
2638 emitInfo.EmitHighLevelInstructions = ctx->Shader.EmitHighLevelInstructions;
2639 emitInfo.EmitCondCodes = ctx->Shader.EmitCondCodes;
2640 emitInfo.EmitComments = ctx->Shader.EmitComments || pragmas->Debug;
2641 emitInfo.EmitBeginEndSub = GL_TRUE;
2642
2643 if (!emitInfo.EmitCondCodes) {
2644 emitInfo.EmitHighLevelInstructions = GL_TRUE;
2645 }
2646
2647 /* Check uniform/constant limits */
2648 if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
2649 maxUniforms = ctx->Const.FragmentProgram.MaxUniformComponents / 4;
2650 }
2651 else if (prog->Target == GL_VERTEX_PROGRAM_ARB) {
2652 maxUniforms = ctx->Const.VertexProgram.MaxUniformComponents / 4;
2653 } else {
2654 assert(prog->Target == MESA_GEOMETRY_PROGRAM);
2655 maxUniforms = ctx->Const.GeometryProgram.MaxUniformComponents / 4;
2656 }
2657 if (prog->Parameters->NumParameters > maxUniforms) {
2658 slang_info_log_error(log, "Constant/uniform register limit exceeded "
2659 "(max=%u vec4)", maxUniforms);
2660
2661 return GL_FALSE;
2662 }
2663
2664 (void) emit(&emitInfo, n);
2665
2666 /* finish up by adding the END opcode to program */
2667 if (withEnd) {
2668 struct prog_instruction *inst;
2669 inst = new_instruction(&emitInfo, OPCODE_END);
2670 if (!inst) {
2671 return GL_FALSE;
2672 }
2673 }
2674
2675 _slang_resolve_subroutines(&emitInfo);
2676
2677 success = GL_TRUE;
2678
2679 #if 0
2680 printf("*********** End emit code (%u inst):\n", prog->NumInstructions);
2681 _mesa_print_program(prog);
2682 _mesa_print_program_parameters(ctx,prog);
2683 #endif
2684
2685 return success;
2686 }