mesa: initial support for ARB_geometry_shader4
[mesa.git] / src / mesa / slang / slang_emit.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2005-2008 Brian Paul All Rights Reserved.
5 * Copyright (C) 2008 VMware, Inc. All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file slang_emit.c
27 * Emit program instructions (PI code) from IR trees.
28 * \author Brian Paul
29 */
30
31 /***
32 *** NOTES
33 ***
34 *** To emit GPU instructions, we basically just do an in-order traversal
35 *** of the IR tree.
36 ***/
37
38
39 #include "main/imports.h"
40 #include "main/context.h"
41 #include "program/program.h"
42 #include "program/prog_instruction.h"
43 #include "program/prog_parameter.h"
44 #include "program/prog_print.h"
45 #include "slang_builtin.h"
46 #include "slang_emit.h"
47 #include "slang_mem.h"
48
49
50 #define PEEPHOLE_OPTIMIZATIONS 1
51 #define ANNOTATE 0
52
53
54 typedef struct
55 {
56 slang_info_log *log;
57 slang_var_table *vt;
58 struct gl_program *prog;
59 struct gl_program **Subroutines;
60 GLuint NumSubroutines;
61
62 GLuint MaxInstructions; /**< size of prog->Instructions[] buffer */
63
64 GLboolean UnresolvedFunctions;
65
66 /* code-gen options */
67 GLboolean EmitHighLevelInstructions;
68 GLboolean EmitCondCodes;
69 GLboolean EmitComments;
70 GLboolean EmitBeginEndSub; /* XXX TEMPORARY */
71 } slang_emit_info;
72
73
74
75 static struct gl_program *
76 new_subroutine(slang_emit_info *emitInfo, GLuint *id)
77 {
78 GET_CURRENT_CONTEXT(ctx);
79 const GLuint n = emitInfo->NumSubroutines;
80
81 emitInfo->Subroutines = (struct gl_program **)
82 _mesa_realloc(emitInfo->Subroutines,
83 n * sizeof(struct gl_program *),
84 (n + 1) * sizeof(struct gl_program *));
85 emitInfo->Subroutines[n] = ctx->Driver.NewProgram(ctx, emitInfo->prog->Target, 0);
86 emitInfo->Subroutines[n]->Parameters = emitInfo->prog->Parameters;
87 emitInfo->NumSubroutines++;
88 *id = n;
89 return emitInfo->Subroutines[n];
90 }
91
92
93 /**
94 * Convert a writemask to a swizzle. Used for testing cond codes because
95 * we only want to test the cond code component(s) that was set by the
96 * previous instruction.
97 */
98 static GLuint
99 writemask_to_swizzle(GLuint writemask)
100 {
101 if (writemask == WRITEMASK_X)
102 return SWIZZLE_XXXX;
103 if (writemask == WRITEMASK_Y)
104 return SWIZZLE_YYYY;
105 if (writemask == WRITEMASK_Z)
106 return SWIZZLE_ZZZZ;
107 if (writemask == WRITEMASK_W)
108 return SWIZZLE_WWWW;
109 return SWIZZLE_XYZW; /* shouldn't be hit */
110 }
111
112
113 /**
114 * Convert a swizzle mask to a writemask.
115 * Note that the slang_ir_storage->Swizzle field can represent either a
116 * swizzle mask or a writemask, depending on how it's used. For example,
117 * when we parse "direction.yz" alone, we don't know whether .yz is a
118 * writemask or a swizzle. In this case, we encode ".yz" in store->Swizzle
119 * as a swizzle mask (.yz?? actually). Later, if direction.yz is used as
120 * an R-value, we use store->Swizzle as-is. Otherwise, if direction.yz is
121 * used as an L-value, we convert it to a writemask.
122 */
123 static GLuint
124 swizzle_to_writemask(GLuint swizzle)
125 {
126 GLuint i, writemask = 0x0;
127 for (i = 0; i < 4; i++) {
128 GLuint swz = GET_SWZ(swizzle, i);
129 if (swz <= SWIZZLE_W) {
130 writemask |= (1 << swz);
131 }
132 }
133 return writemask;
134 }
135
136
137 /**
138 * Swizzle a swizzle (function composition).
139 * That is, return swz2(swz1), or said another way: swz1.szw2
140 * Example: swizzle_swizzle(".zwxx", ".xxyw") yields ".zzwx"
141 */
142 GLuint
143 _slang_swizzle_swizzle(GLuint swz1, GLuint swz2)
144 {
145 GLuint i, swz, s[4];
146 for (i = 0; i < 4; i++) {
147 GLuint c = GET_SWZ(swz2, i);
148 if (c <= SWIZZLE_W)
149 s[i] = GET_SWZ(swz1, c);
150 else
151 s[i] = c;
152 }
153 swz = MAKE_SWIZZLE4(s[0], s[1], s[2], s[3]);
154 return swz;
155 }
156
157
158 /**
159 * Return the default swizzle mask for accessing a variable of the
160 * given size (in floats). If size = 1, comp is used to identify
161 * which component [0..3] of the register holds the variable.
162 */
163 GLuint
164 _slang_var_swizzle(GLint size, GLint comp)
165 {
166 switch (size) {
167 case 1:
168 return MAKE_SWIZZLE4(comp, SWIZZLE_NIL, SWIZZLE_NIL, SWIZZLE_NIL);
169 case 2:
170 return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_NIL, SWIZZLE_NIL);
171 case 3:
172 return MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_NIL);
173 default:
174 return SWIZZLE_XYZW;
175 }
176 }
177
178
179
180 /**
181 * Allocate storage for the given node (if it hasn't already been allocated).
182 *
183 * Typically this is temporary storage for an intermediate result (such as
184 * for a multiply or add, etc).
185 *
186 * If n->Store does not exist it will be created and will be of the size
187 * specified by defaultSize.
188 */
189 static GLboolean
190 alloc_node_storage(slang_emit_info *emitInfo, slang_ir_node *n,
191 GLint defaultSize)
192 {
193 assert(!n->Var);
194 if (!n->Store) {
195 assert(defaultSize > 0);
196 n->Store = _slang_new_ir_storage(PROGRAM_TEMPORARY, -1, defaultSize);
197 if (!n->Store) {
198 return GL_FALSE;
199 }
200 }
201
202 /* now allocate actual register(s). I.e. set n->Store->Index >= 0 */
203 if (n->Store->Index < 0) {
204 if (!_slang_alloc_temp(emitInfo->vt, n->Store)) {
205 slang_info_log_error(emitInfo->log,
206 "Ran out of registers, too many temporaries");
207 _slang_free(n->Store);
208 n->Store = NULL;
209 return GL_FALSE;
210 }
211 }
212 return GL_TRUE;
213 }
214
215
216 /**
217 * Free temporary storage, if n->Store is, in fact, temp storage.
218 * Otherwise, no-op.
219 */
220 static void
221 free_node_storage(slang_var_table *vt, slang_ir_node *n)
222 {
223 if (n->Store->File == PROGRAM_TEMPORARY &&
224 n->Store->Index >= 0 &&
225 n->Opcode != IR_SWIZZLE) {
226 if (_slang_is_temp(vt, n->Store)) {
227 _slang_free_temp(vt, n->Store);
228 n->Store->Index = -1;
229 n->Store = NULL; /* XXX this may not be needed */
230 }
231 }
232 }
233
234
235 /**
236 * Helper function to allocate a short-term temporary.
237 * Free it with _slang_free_temp().
238 */
239 static GLboolean
240 alloc_local_temp(slang_emit_info *emitInfo, slang_ir_storage *temp, GLint size)
241 {
242 assert(size >= 1);
243 assert(size <= 4);
244 memset(temp, 0, sizeof(*temp));
245 temp->Size = size;
246 temp->File = PROGRAM_TEMPORARY;
247 temp->Index = -1;
248 return _slang_alloc_temp(emitInfo->vt, temp);
249 }
250
251
252 /**
253 * Remove any SWIZZLE_NIL terms from given swizzle mask.
254 * For a swizzle like .z??? generate .zzzz (replicate single component).
255 * Else, for .wx?? generate .wxzw (insert default component for the position).
256 */
257 static GLuint
258 fix_swizzle(GLuint swizzle)
259 {
260 GLuint c0 = GET_SWZ(swizzle, 0),
261 c1 = GET_SWZ(swizzle, 1),
262 c2 = GET_SWZ(swizzle, 2),
263 c3 = GET_SWZ(swizzle, 3);
264 if (c1 == SWIZZLE_NIL && c2 == SWIZZLE_NIL && c3 == SWIZZLE_NIL) {
265 /* smear first component across all positions */
266 c1 = c2 = c3 = c0;
267 }
268 else {
269 /* insert default swizzle components */
270 if (c0 == SWIZZLE_NIL)
271 c0 = SWIZZLE_X;
272 if (c1 == SWIZZLE_NIL)
273 c1 = SWIZZLE_Y;
274 if (c2 == SWIZZLE_NIL)
275 c2 = SWIZZLE_Z;
276 if (c3 == SWIZZLE_NIL)
277 c3 = SWIZZLE_W;
278 }
279 return MAKE_SWIZZLE4(c0, c1, c2, c3);
280 }
281
282
283
284 /**
285 * Convert IR storage to an instruction dst register.
286 */
287 static void
288 storage_to_dst_reg(struct prog_dst_register *dst, const slang_ir_storage *st)
289 {
290 const GLboolean relAddr = st->RelAddr;
291 const GLint size = st->Size;
292 GLint index = st->Index;
293 GLuint swizzle = st->Swizzle;
294
295 assert(index >= 0);
296 /* if this is storage relative to some parent storage, walk up the tree */
297 while (st->Parent) {
298 st = st->Parent;
299 assert(st->Index >= 0);
300 index += st->Index;
301 swizzle = _slang_swizzle_swizzle(st->Swizzle, swizzle);
302 }
303
304 assert(st->File != PROGRAM_UNDEFINED);
305 dst->File = st->File;
306
307 assert(index >= 0);
308 dst->Index = index;
309
310 assert(size >= 1);
311 assert(size <= 4);
312
313 if (swizzle != SWIZZLE_XYZW) {
314 dst->WriteMask = swizzle_to_writemask(swizzle);
315 }
316 else {
317 switch (size) {
318 case 1:
319 dst->WriteMask = WRITEMASK_X << GET_SWZ(st->Swizzle, 0);
320 break;
321 case 2:
322 dst->WriteMask = WRITEMASK_XY;
323 break;
324 case 3:
325 dst->WriteMask = WRITEMASK_XYZ;
326 break;
327 case 4:
328 dst->WriteMask = WRITEMASK_XYZW;
329 break;
330 default:
331 ; /* error would have been caught above */
332 }
333 }
334
335 dst->RelAddr = relAddr;
336 }
337
338
339 /**
340 * Convert IR storage to an instruction src register.
341 */
342 static void
343 storage_to_src_reg(struct prog_src_register *src, const slang_ir_storage *st)
344 {
345 const GLboolean relAddr = st->RelAddr;
346 GLint index = st->Index;
347 GLuint swizzle = st->Swizzle;
348
349 /* if this is storage relative to some parent storage, walk up the tree */
350 assert(index >= 0);
351 while (st->Parent) {
352 st = st->Parent;
353 if (st->Index < 0) {
354 /* an error should have been reported already */
355 return;
356 }
357 assert(st->Index >= 0);
358 index += st->Index;
359 swizzle = _slang_swizzle_swizzle(fix_swizzle(st->Swizzle), swizzle);
360 }
361
362 assert(st->File >= 0);
363 #if 1 /* XXX temporary */
364 if (st->File == PROGRAM_UNDEFINED) {
365 slang_ir_storage *st0 = (slang_ir_storage *) st;
366 st0->File = PROGRAM_TEMPORARY;
367 }
368 #endif
369 assert(st->File < PROGRAM_FILE_MAX);
370 src->File = st->File;
371
372 assert(index >= 0);
373 src->Index = index;
374
375 swizzle = fix_swizzle(swizzle);
376 assert(GET_SWZ(swizzle, 0) <= SWIZZLE_W);
377 assert(GET_SWZ(swizzle, 1) <= SWIZZLE_W);
378 assert(GET_SWZ(swizzle, 2) <= SWIZZLE_W);
379 assert(GET_SWZ(swizzle, 3) <= SWIZZLE_W);
380 src->Swizzle = swizzle;
381
382 src->RelAddr = relAddr;
383 }
384
385
386 /*
387 * Setup storage pointing to a scalar constant/literal.
388 */
389 static void
390 constant_to_storage(slang_emit_info *emitInfo,
391 GLfloat val,
392 slang_ir_storage *store)
393 {
394 GLuint swizzle;
395 GLint reg;
396 GLfloat value[4];
397
398 value[0] = val;
399 reg = _mesa_add_unnamed_constant(emitInfo->prog->Parameters,
400 value, 1, &swizzle);
401
402 memset(store, 0, sizeof(*store));
403 store->File = PROGRAM_CONSTANT;
404 store->Index = reg;
405 store->Swizzle = swizzle;
406 }
407
408
409 /**
410 * Add new instruction at end of given program.
411 * \param prog the program to append instruction onto
412 * \param opcode opcode for the new instruction
413 * \return pointer to the new instruction
414 */
415 static struct prog_instruction *
416 new_instruction(slang_emit_info *emitInfo, gl_inst_opcode opcode)
417 {
418 struct gl_program *prog = emitInfo->prog;
419 struct prog_instruction *inst;
420
421 #if 0
422 /* print prev inst */
423 if (prog->NumInstructions > 0) {
424 _mesa_print_instruction(prog->Instructions + prog->NumInstructions - 1);
425 }
426 #endif
427 assert(prog->NumInstructions <= emitInfo->MaxInstructions);
428
429 if (prog->NumInstructions == emitInfo->MaxInstructions) {
430 /* grow the instruction buffer */
431 emitInfo->MaxInstructions += 20;
432 prog->Instructions =
433 _mesa_realloc_instructions(prog->Instructions,
434 prog->NumInstructions,
435 emitInfo->MaxInstructions);
436 if (!prog->Instructions) {
437 return NULL;
438 }
439 }
440
441 inst = prog->Instructions + prog->NumInstructions;
442 prog->NumInstructions++;
443 _mesa_init_instructions(inst, 1);
444 inst->Opcode = opcode;
445 inst->BranchTarget = -1; /* invalid */
446 /*
447 printf("New inst %d: %p %s\n", prog->NumInstructions-1,(void*)inst,
448 _mesa_opcode_string(inst->Opcode));
449 */
450 return inst;
451 }
452
453
454 static struct prog_instruction *
455 emit_arl_load(slang_emit_info *emitInfo,
456 gl_register_file file, GLint index, GLuint swizzle)
457 {
458 struct prog_instruction *inst = new_instruction(emitInfo, OPCODE_ARL);
459 if (inst) {
460 inst->SrcReg[0].File = file;
461 inst->SrcReg[0].Index = index;
462 inst->SrcReg[0].Swizzle = fix_swizzle(swizzle);
463 inst->DstReg.File = PROGRAM_ADDRESS;
464 inst->DstReg.Index = 0;
465 inst->DstReg.WriteMask = WRITEMASK_X;
466 }
467 return inst;
468 }
469
470
471 /**
472 * Emit a new instruction with given opcode, operands.
473 * At this point the instruction may have multiple indirect register
474 * loads/stores. We convert those into ARL loads and address-relative
475 * operands. See comments inside.
476 * At some point in the future we could directly emit indirectly addressed
477 * registers in Mesa GPU instructions.
478 */
479 static struct prog_instruction *
480 emit_instruction(slang_emit_info *emitInfo,
481 gl_inst_opcode opcode,
482 const slang_ir_storage *dst,
483 const slang_ir_storage *src0,
484 const slang_ir_storage *src1,
485 const slang_ir_storage *src2)
486 {
487 struct prog_instruction *inst;
488 GLuint numIndirect = 0;
489 const slang_ir_storage *src[3];
490 slang_ir_storage newSrc[3], newDst;
491 GLuint i;
492 GLboolean isTemp[3];
493
494 isTemp[0] = isTemp[1] = isTemp[2] = GL_FALSE;
495
496 src[0] = src0;
497 src[1] = src1;
498 src[2] = src2;
499
500 /* count up how many operands are indirect loads */
501 for (i = 0; i < 3; i++) {
502 if (src[i] && src[i]->IsIndirect)
503 numIndirect++;
504 }
505 if (dst && dst->IsIndirect)
506 numIndirect++;
507
508 /* Take special steps for indirect register loads.
509 * If we had multiple address registers this would be simpler.
510 * For example, this GLSL code:
511 * x[i] = y[j] + z[k];
512 * would translate into something like:
513 * ARL ADDR.x, i;
514 * ARL ADDR.y, j;
515 * ARL ADDR.z, k;
516 * ADD TEMP[ADDR.x+5], TEMP[ADDR.y+9], TEMP[ADDR.z+4];
517 * But since we currently only have one address register we have to do this:
518 * ARL ADDR.x, i;
519 * MOV t1, TEMP[ADDR.x+9];
520 * ARL ADDR.x, j;
521 * MOV t2, TEMP[ADDR.x+4];
522 * ARL ADDR.x, k;
523 * ADD TEMP[ADDR.x+5], t1, t2;
524 * The code here figures this out...
525 */
526 if (numIndirect > 0) {
527 for (i = 0; i < 3; i++) {
528 if (src[i] && src[i]->IsIndirect) {
529 /* load the ARL register with the indirect register */
530 emit_arl_load(emitInfo,
531 src[i]->IndirectFile,
532 src[i]->IndirectIndex,
533 src[i]->IndirectSwizzle);
534
535 if (numIndirect > 1) {
536 /* Need to load src[i] into a temporary register */
537 slang_ir_storage srcRelAddr;
538 alloc_local_temp(emitInfo, &newSrc[i], src[i]->Size);
539 isTemp[i] = GL_TRUE;
540
541 /* set RelAddr flag on src register */
542 srcRelAddr = *src[i];
543 srcRelAddr.RelAddr = GL_TRUE;
544 srcRelAddr.IsIndirect = GL_FALSE; /* not really needed */
545
546 /* MOV newSrc, srcRelAddr; */
547 inst = emit_instruction(emitInfo,
548 OPCODE_MOV,
549 &newSrc[i],
550 &srcRelAddr,
551 NULL,
552 NULL);
553 if (!inst) {
554 return NULL;
555 }
556
557 src[i] = &newSrc[i];
558 }
559 else {
560 /* just rewrite the src[i] storage to be ARL-relative */
561 newSrc[i] = *src[i];
562 newSrc[i].RelAddr = GL_TRUE;
563 newSrc[i].IsIndirect = GL_FALSE; /* not really needed */
564 src[i] = &newSrc[i];
565 }
566 }
567 }
568 }
569
570 /* Take special steps for indirect dest register write */
571 if (dst && dst->IsIndirect) {
572 /* load the ARL register with the indirect register */
573 emit_arl_load(emitInfo,
574 dst->IndirectFile,
575 dst->IndirectIndex,
576 dst->IndirectSwizzle);
577 newDst = *dst;
578 newDst.RelAddr = GL_TRUE;
579 newDst.IsIndirect = GL_FALSE;
580 dst = &newDst;
581 }
582
583 /* OK, emit the instruction and its dst, src regs */
584 inst = new_instruction(emitInfo, opcode);
585 if (!inst)
586 return NULL;
587
588 if (dst)
589 storage_to_dst_reg(&inst->DstReg, dst);
590
591 for (i = 0; i < 3; i++) {
592 if (src[i])
593 storage_to_src_reg(&inst->SrcReg[i], src[i]);
594 }
595
596 /* Free any temp registers that we allocated above */
597 for (i = 0; i < 3; i++) {
598 if (isTemp[i])
599 _slang_free_temp(emitInfo->vt, &newSrc[i]);
600 }
601
602 return inst;
603 }
604
605
606
607 /**
608 * Put a comment on the given instruction.
609 */
610 static void
611 inst_comment(struct prog_instruction *inst, const char *comment)
612 {
613 if (inst)
614 inst->Comment = _mesa_strdup(comment);
615 }
616
617
618
619 /**
620 * Return pointer to last instruction in program.
621 */
622 static struct prog_instruction *
623 prev_instruction(slang_emit_info *emitInfo)
624 {
625 struct gl_program *prog = emitInfo->prog;
626 if (prog->NumInstructions == 0)
627 return NULL;
628 else
629 return prog->Instructions + prog->NumInstructions - 1;
630 }
631
632
633 static struct prog_instruction *
634 emit(slang_emit_info *emitInfo, slang_ir_node *n);
635
636
637 /**
638 * Return an annotation string for given node's storage.
639 */
640 static char *
641 storage_annotation(const slang_ir_node *n, const struct gl_program *prog)
642 {
643 #if ANNOTATE
644 const slang_ir_storage *st = n->Store;
645 static char s[100] = "";
646
647 if (!st)
648 return _mesa_strdup("");
649
650 switch (st->File) {
651 case PROGRAM_CONSTANT:
652 if (st->Index >= 0) {
653 const GLfloat *val = prog->Parameters->ParameterValues[st->Index];
654 if (st->Swizzle == SWIZZLE_NOOP)
655 _mesa_snprintf(s, sizeof(s), "{%g, %g, %g, %g}", val[0], val[1], val[2], val[3]);
656 else {
657 _mesa_snprintf(s, sizeof(s), "%g", val[GET_SWZ(st->Swizzle, 0)]);
658 }
659 }
660 break;
661 case PROGRAM_TEMPORARY:
662 if (n->Var)
663 _mesa_snprintf(s, sizeof(s), "%s", (char *) n->Var->a_name);
664 else
665 _mesa_snprintf(s, sizeof(s), "t[%d]", st->Index);
666 break;
667 case PROGRAM_STATE_VAR:
668 case PROGRAM_UNIFORM:
669 _mesa_snprintf(s, sizeof(s), "%s", prog->Parameters->Parameters[st->Index].Name);
670 break;
671 case PROGRAM_VARYING:
672 _mesa_snprintf(s, sizeof(s), "%s", prog->Varying->Parameters[st->Index].Name);
673 break;
674 case PROGRAM_INPUT:
675 _mesa_snprintf(s, sizeof(s), "input[%d]", st->Index);
676 break;
677 case PROGRAM_OUTPUT:
678 _mesa_snprintf(s, sizeof(s), "output[%d]", st->Index);
679 break;
680 default:
681 s[0] = 0;
682 }
683 return _mesa_strdup(s);
684 #else
685 return NULL;
686 #endif
687 }
688
689
690 /**
691 * Return an annotation string for an instruction.
692 */
693 static char *
694 instruction_annotation(gl_inst_opcode opcode, char *dstAnnot,
695 char *srcAnnot0, char *srcAnnot1, char *srcAnnot2)
696 {
697 #if ANNOTATE
698 const char *operator;
699 char *s;
700 int len = 50;
701
702 if (dstAnnot)
703 len += strlen(dstAnnot);
704 else
705 dstAnnot = _mesa_strdup("");
706
707 if (srcAnnot0)
708 len += strlen(srcAnnot0);
709 else
710 srcAnnot0 = _mesa_strdup("");
711
712 if (srcAnnot1)
713 len += strlen(srcAnnot1);
714 else
715 srcAnnot1 = _mesa_strdup("");
716
717 if (srcAnnot2)
718 len += strlen(srcAnnot2);
719 else
720 srcAnnot2 = _mesa_strdup("");
721
722 switch (opcode) {
723 case OPCODE_ADD:
724 operator = "+";
725 break;
726 case OPCODE_SUB:
727 operator = "-";
728 break;
729 case OPCODE_MUL:
730 operator = "*";
731 break;
732 case OPCODE_DP2:
733 operator = "DP2";
734 break;
735 case OPCODE_DP3:
736 operator = "DP3";
737 break;
738 case OPCODE_DP4:
739 operator = "DP4";
740 break;
741 case OPCODE_XPD:
742 operator = "XPD";
743 break;
744 case OPCODE_RSQ:
745 operator = "RSQ";
746 break;
747 case OPCODE_SGT:
748 operator = ">";
749 break;
750 default:
751 operator = ",";
752 }
753
754 s = (char *) malloc(len);
755 _mesa_snprintf(s, len, "%s = %s %s %s %s", dstAnnot,
756 srcAnnot0, operator, srcAnnot1, srcAnnot2);
757
758 free(dstAnnot);
759 free(srcAnnot0);
760 free(srcAnnot1);
761 free(srcAnnot2);
762
763 return s;
764 #else
765 return NULL;
766 #endif
767 }
768
769
770 /**
771 * Emit an instruction that's just a comment.
772 */
773 static struct prog_instruction *
774 emit_comment(slang_emit_info *emitInfo, const char *comment)
775 {
776 struct prog_instruction *inst = new_instruction(emitInfo, OPCODE_NOP);
777 if (inst) {
778 inst_comment(inst, comment);
779 }
780 return inst;
781 }
782
783
784 /**
785 * Generate code for a simple arithmetic instruction.
786 * Either 1, 2 or 3 operands.
787 */
788 static struct prog_instruction *
789 emit_arith(slang_emit_info *emitInfo, slang_ir_node *n)
790 {
791 const slang_ir_info *info = _slang_ir_info(n->Opcode);
792 struct prog_instruction *inst;
793 GLuint i;
794
795 assert(info);
796 assert(info->InstOpcode != OPCODE_NOP);
797
798 #if PEEPHOLE_OPTIMIZATIONS
799 /* Look for MAD opportunity */
800 if (info->NumParams == 2 &&
801 n->Opcode == IR_ADD && n->Children[0]->Opcode == IR_MUL) {
802 /* found pattern IR_ADD(IR_MUL(A, B), C) */
803 emit(emitInfo, n->Children[0]->Children[0]); /* A */
804 emit(emitInfo, n->Children[0]->Children[1]); /* B */
805 emit(emitInfo, n->Children[1]); /* C */
806 if (!alloc_node_storage(emitInfo, n, -1)) { /* dest */
807 return NULL;
808 }
809
810 inst = emit_instruction(emitInfo,
811 OPCODE_MAD,
812 n->Store,
813 n->Children[0]->Children[0]->Store,
814 n->Children[0]->Children[1]->Store,
815 n->Children[1]->Store);
816
817 free_node_storage(emitInfo->vt, n->Children[0]->Children[0]);
818 free_node_storage(emitInfo->vt, n->Children[0]->Children[1]);
819 free_node_storage(emitInfo->vt, n->Children[1]);
820 return inst;
821 }
822
823 if (info->NumParams == 2 &&
824 n->Opcode == IR_ADD && n->Children[1]->Opcode == IR_MUL) {
825 /* found pattern IR_ADD(A, IR_MUL(B, C)) */
826 emit(emitInfo, n->Children[0]); /* A */
827 emit(emitInfo, n->Children[1]->Children[0]); /* B */
828 emit(emitInfo, n->Children[1]->Children[1]); /* C */
829 if (!alloc_node_storage(emitInfo, n, -1)) { /* dest */
830 return NULL;
831 }
832
833 inst = emit_instruction(emitInfo,
834 OPCODE_MAD,
835 n->Store,
836 n->Children[1]->Children[0]->Store,
837 n->Children[1]->Children[1]->Store,
838 n->Children[0]->Store);
839
840 free_node_storage(emitInfo->vt, n->Children[1]->Children[0]);
841 free_node_storage(emitInfo->vt, n->Children[1]->Children[1]);
842 free_node_storage(emitInfo->vt, n->Children[0]);
843 return inst;
844 }
845 #endif
846
847 /* gen code for children, may involve temp allocation */
848 for (i = 0; i < info->NumParams; i++) {
849 emit(emitInfo, n->Children[i]);
850 if (!n->Children[i] || !n->Children[i]->Store) {
851 /* error recovery */
852 return NULL;
853 }
854 }
855
856 /* result storage */
857 if (!alloc_node_storage(emitInfo, n, -1)) {
858 return NULL;
859 }
860
861 inst = emit_instruction(emitInfo,
862 info->InstOpcode,
863 n->Store, /* dest */
864 (info->NumParams > 0 ? n->Children[0]->Store : NULL),
865 (info->NumParams > 1 ? n->Children[1]->Store : NULL),
866 (info->NumParams > 2 ? n->Children[2]->Store : NULL)
867 );
868
869 /* free temps */
870 for (i = 0; i < info->NumParams; i++)
871 free_node_storage(emitInfo->vt, n->Children[i]);
872
873 return inst;
874 }
875
876
877 /**
878 * Emit code for == and != operators. These could normally be handled
879 * by emit_arith() except we need to be able to handle structure comparisons.
880 */
881 static struct prog_instruction *
882 emit_compare(slang_emit_info *emitInfo, slang_ir_node *n)
883 {
884 struct prog_instruction *inst = NULL;
885 GLint size;
886
887 assert(n->Opcode == IR_EQUAL || n->Opcode == IR_NOTEQUAL);
888
889 /* gen code for children */
890 emit(emitInfo, n->Children[0]);
891 emit(emitInfo, n->Children[1]);
892
893 if (n->Children[0]->Store->Size != n->Children[1]->Store->Size) {
894 /* XXX this error should have been caught in slang_codegen.c */
895 slang_info_log_error(emitInfo->log, "invalid operands to == or !=");
896 n->Store = NULL;
897 return NULL;
898 }
899
900 /* final result is 1 bool */
901 if (!alloc_node_storage(emitInfo, n, 1))
902 return NULL;
903
904 size = n->Children[0]->Store->Size;
905
906 if (size == 1) {
907 gl_inst_opcode opcode = n->Opcode == IR_EQUAL ? OPCODE_SEQ : OPCODE_SNE;
908 inst = emit_instruction(emitInfo,
909 opcode,
910 n->Store, /* dest */
911 n->Children[0]->Store,
912 n->Children[1]->Store,
913 NULL);
914 }
915 else if (size <= 4) {
916 /* compare two vectors.
917 * Unfortunately, there's no instruction to compare vectors and
918 * return a scalar result. Do it with some compare and dot product
919 * instructions...
920 */
921 GLuint swizzle;
922 gl_inst_opcode dotOp;
923 slang_ir_storage tempStore;
924
925 if (!alloc_local_temp(emitInfo, &tempStore, 4)) {
926 n->Store = NULL;
927 return NULL;
928 /* out of temps */
929 }
930
931 if (size == 4) {
932 dotOp = OPCODE_DP4;
933 swizzle = SWIZZLE_XYZW;
934 }
935 else if (size == 3) {
936 dotOp = OPCODE_DP3;
937 swizzle = SWIZZLE_XYZW;
938 }
939 else {
940 assert(size == 2);
941 dotOp = OPCODE_DP3; /* XXX use OPCODE_DP2 eventually */
942 swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y);
943 }
944
945 /* Compute inequality (temp = (A != B)) */
946 inst = emit_instruction(emitInfo,
947 OPCODE_SNE,
948 &tempStore,
949 n->Children[0]->Store,
950 n->Children[1]->Store,
951 NULL);
952 if (!inst) {
953 return NULL;
954 }
955 inst_comment(inst, "Compare values");
956
957 /* Compute val = DOT(temp, temp) (reduction) */
958 inst = emit_instruction(emitInfo,
959 dotOp,
960 n->Store,
961 &tempStore,
962 &tempStore,
963 NULL);
964 if (!inst) {
965 return NULL;
966 }
967 inst->SrcReg[0].Swizzle = inst->SrcReg[1].Swizzle = swizzle; /*override*/
968 inst_comment(inst, "Reduce vec to bool");
969
970 _slang_free_temp(emitInfo->vt, &tempStore); /* free temp */
971
972 if (n->Opcode == IR_EQUAL) {
973 /* compute val = !val.x with SEQ val, val, 0; */
974 slang_ir_storage zero;
975 constant_to_storage(emitInfo, 0.0, &zero);
976 inst = emit_instruction(emitInfo,
977 OPCODE_SEQ,
978 n->Store, /* dest */
979 n->Store,
980 &zero,
981 NULL);
982 if (!inst) {
983 return NULL;
984 }
985 inst_comment(inst, "Invert true/false");
986 }
987 }
988 else {
989 /* size > 4, struct or array compare.
990 * XXX this won't work reliably for structs with padding!!
991 */
992 GLint i, num = (n->Children[0]->Store->Size + 3) / 4;
993 slang_ir_storage accTemp, sneTemp;
994
995 if (!alloc_local_temp(emitInfo, &accTemp, 4))
996 return NULL;
997
998 if (!alloc_local_temp(emitInfo, &sneTemp, 4))
999 return NULL;
1000
1001 for (i = 0; i < num; i++) {
1002 slang_ir_storage srcStore0 = *n->Children[0]->Store;
1003 slang_ir_storage srcStore1 = *n->Children[1]->Store;
1004 srcStore0.Index += i;
1005 srcStore1.Index += i;
1006
1007 if (i == 0) {
1008 /* SNE accTemp, left[i], right[i] */
1009 inst = emit_instruction(emitInfo, OPCODE_SNE,
1010 &accTemp, /* dest */
1011 &srcStore0,
1012 &srcStore1,
1013 NULL);
1014 if (!inst) {
1015 return NULL;
1016 }
1017 inst_comment(inst, "Begin struct/array comparison");
1018 }
1019 else {
1020 /* SNE sneTemp, left[i], right[i] */
1021 inst = emit_instruction(emitInfo, OPCODE_SNE,
1022 &sneTemp, /* dest */
1023 &srcStore0,
1024 &srcStore1,
1025 NULL);
1026 if (!inst) {
1027 return NULL;
1028 }
1029 /* ADD accTemp, accTemp, sneTemp; # like logical-OR */
1030 inst = emit_instruction(emitInfo, OPCODE_ADD,
1031 &accTemp, /* dest */
1032 &accTemp,
1033 &sneTemp,
1034 NULL);
1035 if (!inst) {
1036 return NULL;
1037 }
1038 }
1039 }
1040
1041 /* compute accTemp.x || accTemp.y || accTemp.z || accTemp.w with DOT4 */
1042 inst = emit_instruction(emitInfo, OPCODE_DP4,
1043 n->Store,
1044 &accTemp,
1045 &accTemp,
1046 NULL);
1047 if (!inst) {
1048 return NULL;
1049 }
1050 inst_comment(inst, "End struct/array comparison");
1051
1052 if (n->Opcode == IR_EQUAL) {
1053 /* compute tmp.x = !tmp.x via tmp.x = (tmp.x == 0) */
1054 slang_ir_storage zero;
1055 constant_to_storage(emitInfo, 0.0, &zero);
1056 inst = emit_instruction(emitInfo, OPCODE_SEQ,
1057 n->Store, /* dest */
1058 n->Store,
1059 &zero,
1060 NULL);
1061 if (!inst) {
1062 return NULL;
1063 }
1064 inst_comment(inst, "Invert true/false");
1065 }
1066
1067 _slang_free_temp(emitInfo->vt, &accTemp);
1068 _slang_free_temp(emitInfo->vt, &sneTemp);
1069 }
1070
1071 /* free temps */
1072 free_node_storage(emitInfo->vt, n->Children[0]);
1073 free_node_storage(emitInfo->vt, n->Children[1]);
1074
1075 return inst;
1076 }
1077
1078
1079
1080 /**
1081 * Generate code for an IR_CLAMP instruction.
1082 */
1083 static struct prog_instruction *
1084 emit_clamp(slang_emit_info *emitInfo, slang_ir_node *n)
1085 {
1086 struct prog_instruction *inst;
1087 slang_ir_node tmpNode;
1088
1089 assert(n->Opcode == IR_CLAMP);
1090 /* ch[0] = value
1091 * ch[1] = min limit
1092 * ch[2] = max limit
1093 */
1094
1095 inst = emit(emitInfo, n->Children[0]);
1096
1097 /* If lower limit == 0.0 and upper limit == 1.0,
1098 * set prev instruction's SaturateMode field to SATURATE_ZERO_ONE.
1099 * Else,
1100 * emit OPCODE_MIN, OPCODE_MAX sequence.
1101 */
1102 #if 0
1103 /* XXX this isn't quite finished yet */
1104 if (n->Children[1]->Opcode == IR_FLOAT &&
1105 n->Children[1]->Value[0] == 0.0 &&
1106 n->Children[1]->Value[1] == 0.0 &&
1107 n->Children[1]->Value[2] == 0.0 &&
1108 n->Children[1]->Value[3] == 0.0 &&
1109 n->Children[2]->Opcode == IR_FLOAT &&
1110 n->Children[2]->Value[0] == 1.0 &&
1111 n->Children[2]->Value[1] == 1.0 &&
1112 n->Children[2]->Value[2] == 1.0 &&
1113 n->Children[2]->Value[3] == 1.0) {
1114 if (!inst) {
1115 inst = prev_instruction(prog);
1116 }
1117 if (inst && inst->Opcode != OPCODE_NOP) {
1118 /* and prev instruction's DstReg matches n->Children[0]->Store */
1119 inst->SaturateMode = SATURATE_ZERO_ONE;
1120 n->Store = n->Children[0]->Store;
1121 return inst;
1122 }
1123 }
1124 #else
1125 (void) inst;
1126 #endif
1127
1128 if (!alloc_node_storage(emitInfo, n, n->Children[0]->Store->Size))
1129 return NULL;
1130
1131 emit(emitInfo, n->Children[1]);
1132 emit(emitInfo, n->Children[2]);
1133
1134 /* Some GPUs don't allow reading from output registers. So if the
1135 * dest for this clamp() is an output reg, we can't use that reg for
1136 * the intermediate result. Use a temp register instead.
1137 */
1138 memset(&tmpNode, 0, sizeof(tmpNode));
1139 if (!alloc_node_storage(emitInfo, &tmpNode, n->Store->Size)) {
1140 return NULL;
1141 }
1142
1143 /* tmp = max(ch[0], ch[1]) */
1144 inst = emit_instruction(emitInfo, OPCODE_MAX,
1145 tmpNode.Store, /* dest */
1146 n->Children[0]->Store,
1147 n->Children[1]->Store,
1148 NULL);
1149 if (!inst) {
1150 return NULL;
1151 }
1152
1153 /* n->dest = min(tmp, ch[2]) */
1154 inst = emit_instruction(emitInfo, OPCODE_MIN,
1155 n->Store, /* dest */
1156 tmpNode.Store,
1157 n->Children[2]->Store,
1158 NULL);
1159
1160 free_node_storage(emitInfo->vt, &tmpNode);
1161
1162 return inst;
1163 }
1164
1165
1166 static struct prog_instruction *
1167 emit_negation(slang_emit_info *emitInfo, slang_ir_node *n)
1168 {
1169 /* Implement as MOV dst, -src; */
1170 /* XXX we could look at the previous instruction and in some circumstances
1171 * modify it to accomplish the negation.
1172 */
1173 struct prog_instruction *inst;
1174
1175 emit(emitInfo, n->Children[0]);
1176
1177 if (!alloc_node_storage(emitInfo, n, n->Children[0]->Store->Size))
1178 return NULL;
1179
1180 inst = emit_instruction(emitInfo,
1181 OPCODE_MOV,
1182 n->Store, /* dest */
1183 n->Children[0]->Store,
1184 NULL,
1185 NULL);
1186 if (inst) {
1187 inst->SrcReg[0].Negate = NEGATE_XYZW;
1188 }
1189 return inst;
1190 }
1191
1192
1193 static struct prog_instruction *
1194 emit_label(slang_emit_info *emitInfo, const slang_ir_node *n)
1195 {
1196 assert(n->Label);
1197 #if 0
1198 /* XXX this fails in loop tail code - investigate someday */
1199 assert(_slang_label_get_location(n->Label) < 0);
1200 _slang_label_set_location(n->Label, emitInfo->prog->NumInstructions,
1201 emitInfo->prog);
1202 #else
1203 if (_slang_label_get_location(n->Label) < 0)
1204 _slang_label_set_location(n->Label, emitInfo->prog->NumInstructions,
1205 emitInfo->prog);
1206 #endif
1207 return NULL;
1208 }
1209
1210
1211 /**
1212 * Emit code for a function call.
1213 * Note that for each time a function is called, we emit the function's
1214 * body code again because the set of available registers may be different.
1215 */
1216 static struct prog_instruction *
1217 emit_fcall(slang_emit_info *emitInfo, slang_ir_node *n)
1218 {
1219 struct gl_program *progSave;
1220 struct prog_instruction *inst;
1221 GLuint subroutineId;
1222 GLuint maxInstSave;
1223
1224 assert(n->Opcode == IR_CALL);
1225 assert(n->Label);
1226
1227 /* save/push cur program */
1228 maxInstSave = emitInfo->MaxInstructions;
1229 progSave = emitInfo->prog;
1230
1231 emitInfo->prog = new_subroutine(emitInfo, &subroutineId);
1232 emitInfo->MaxInstructions = emitInfo->prog->NumInstructions;
1233
1234 _slang_label_set_location(n->Label, emitInfo->prog->NumInstructions,
1235 emitInfo->prog);
1236
1237 if (emitInfo->EmitBeginEndSub) {
1238 /* BGNSUB isn't a real instruction.
1239 * We require a label (i.e. "foobar:") though, if we're going to
1240 * print the program in the NV format. The BNGSUB instruction is
1241 * really just a NOP to attach the label to.
1242 */
1243 inst = new_instruction(emitInfo, OPCODE_BGNSUB);
1244 if (!inst) {
1245 return NULL;
1246 }
1247 inst_comment(inst, n->Label->Name);
1248 }
1249
1250 /* body of function: */
1251 emit(emitInfo, n->Children[0]);
1252 n->Store = n->Children[0]->Store;
1253
1254 /* add RET instruction now, if needed */
1255 inst = prev_instruction(emitInfo);
1256 if (inst && inst->Opcode != OPCODE_RET) {
1257 inst = new_instruction(emitInfo, OPCODE_RET);
1258 if (!inst) {
1259 return NULL;
1260 }
1261 }
1262
1263 if (emitInfo->EmitBeginEndSub) {
1264 inst = new_instruction(emitInfo, OPCODE_ENDSUB);
1265 if (!inst) {
1266 return NULL;
1267 }
1268 inst_comment(inst, n->Label->Name);
1269 }
1270
1271 /* pop/restore cur program */
1272 emitInfo->prog = progSave;
1273 emitInfo->MaxInstructions = maxInstSave;
1274
1275 /* emit the function call */
1276 inst = new_instruction(emitInfo, OPCODE_CAL);
1277 if (!inst) {
1278 return NULL;
1279 }
1280 /* The branch target is just the subroutine number (changed later) */
1281 inst->BranchTarget = subroutineId;
1282 inst_comment(inst, n->Label->Name);
1283 assert(inst->BranchTarget >= 0);
1284
1285 return inst;
1286 }
1287
1288
1289 /**
1290 * Emit code for a 'return' statement.
1291 */
1292 static struct prog_instruction *
1293 emit_return(slang_emit_info *emitInfo, slang_ir_node *n)
1294 {
1295 struct prog_instruction *inst;
1296 assert(n);
1297 assert(n->Opcode == IR_RETURN);
1298 assert(n->Label);
1299 inst = new_instruction(emitInfo, OPCODE_RET);
1300 if (inst) {
1301 inst->DstReg.CondMask = COND_TR; /* always return */
1302 }
1303 return inst;
1304 }
1305
1306
1307 static struct prog_instruction *
1308 emit_kill(slang_emit_info *emitInfo)
1309 {
1310 struct gl_fragment_program *fp;
1311 struct prog_instruction *inst;
1312 /* NV-KILL - discard fragment depending on condition code.
1313 * Note that ARB-KILL depends on sign of vector operand.
1314 */
1315 inst = new_instruction(emitInfo, OPCODE_KIL_NV);
1316 if (!inst) {
1317 return NULL;
1318 }
1319 inst->DstReg.CondMask = COND_TR; /* always kill */
1320
1321 assert(emitInfo->prog->Target == GL_FRAGMENT_PROGRAM_ARB);
1322 fp = (struct gl_fragment_program *) emitInfo->prog;
1323 fp->UsesKill = GL_TRUE;
1324
1325 return inst;
1326 }
1327
1328
1329 static struct prog_instruction *
1330 emit_tex(slang_emit_info *emitInfo, slang_ir_node *n)
1331 {
1332 struct prog_instruction *inst;
1333 gl_inst_opcode opcode;
1334 GLboolean shadow = GL_FALSE;
1335
1336 switch (n->Opcode) {
1337 case IR_TEX:
1338 opcode = OPCODE_TEX;
1339 break;
1340 case IR_TEX_SH:
1341 opcode = OPCODE_TEX;
1342 shadow = GL_TRUE;
1343 break;
1344 case IR_TEXB:
1345 opcode = OPCODE_TXB;
1346 break;
1347 case IR_TEXB_SH:
1348 opcode = OPCODE_TXB;
1349 shadow = GL_TRUE;
1350 break;
1351 case IR_TEXP:
1352 opcode = OPCODE_TXP;
1353 break;
1354 case IR_TEXP_SH:
1355 opcode = OPCODE_TXP;
1356 shadow = GL_TRUE;
1357 break;
1358 default:
1359 _mesa_problem(NULL, "Bad IR TEX code");
1360 return NULL;
1361 }
1362
1363 if (n->Children[0]->Opcode == IR_ELEMENT) {
1364 /* array is the sampler (a uniform which'll indicate the texture unit) */
1365 assert(n->Children[0]->Children[0]->Store);
1366 assert(n->Children[0]->Children[0]->Store->File == PROGRAM_SAMPLER);
1367
1368 emit(emitInfo, n->Children[0]);
1369
1370 n->Children[0]->Var = n->Children[0]->Children[0]->Var;
1371 } else {
1372 /* this is the sampler (a uniform which'll indicate the texture unit) */
1373 assert(n->Children[0]->Store);
1374 assert(n->Children[0]->Store->File == PROGRAM_SAMPLER);
1375 }
1376
1377 /* emit code for the texcoord operand */
1378 (void) emit(emitInfo, n->Children[1]);
1379
1380 /* alloc storage for result of texture fetch */
1381 if (!alloc_node_storage(emitInfo, n, 4))
1382 return NULL;
1383
1384 /* emit TEX instruction; Child[1] is the texcoord */
1385 inst = emit_instruction(emitInfo,
1386 opcode,
1387 n->Store,
1388 n->Children[1]->Store,
1389 NULL,
1390 NULL);
1391 if (!inst) {
1392 return NULL;
1393 }
1394
1395 inst->TexShadow = shadow;
1396
1397 /* Store->Index is the uniform/sampler index */
1398 assert(n->Children[0]->Store->Index >= 0);
1399 inst->TexSrcUnit = n->Children[0]->Store->Index;
1400 inst->TexSrcTarget = n->Children[0]->Store->TexTarget;
1401
1402 /* mark the sampler as being used */
1403 _mesa_use_uniform(emitInfo->prog->Parameters,
1404 (char *) n->Children[0]->Var->a_name);
1405
1406 return inst;
1407 }
1408
1409
1410 /**
1411 * Assignment/copy
1412 */
1413 static struct prog_instruction *
1414 emit_copy(slang_emit_info *emitInfo, slang_ir_node *n)
1415 {
1416 struct prog_instruction *inst;
1417
1418 assert(n->Opcode == IR_COPY);
1419
1420 /* lhs */
1421 emit(emitInfo, n->Children[0]);
1422 if (!n->Children[0]->Store || n->Children[0]->Store->Index < 0) {
1423 /* an error should have been already recorded */
1424 return NULL;
1425 }
1426
1427 /* rhs */
1428 assert(n->Children[1]);
1429 inst = emit(emitInfo, n->Children[1]);
1430
1431 if (!n->Children[1]->Store || n->Children[1]->Store->Index < 0) {
1432 if (!emitInfo->log->text && !emitInfo->UnresolvedFunctions) {
1433 /* XXX this error should have been caught in slang_codegen.c */
1434 slang_info_log_error(emitInfo->log, "invalid assignment");
1435 }
1436 return NULL;
1437 }
1438
1439 assert(n->Children[1]->Store->Index >= 0);
1440
1441 /*assert(n->Children[0]->Store->Size == n->Children[1]->Store->Size);*/
1442
1443 n->Store = n->Children[0]->Store;
1444
1445 if (n->Store->File == PROGRAM_SAMPLER) {
1446 /* no code generated for sampler assignments,
1447 * just copy the sampler index/target at compile time.
1448 */
1449 n->Store->Index = n->Children[1]->Store->Index;
1450 n->Store->TexTarget = n->Children[1]->Store->TexTarget;
1451 return NULL;
1452 }
1453
1454 #if PEEPHOLE_OPTIMIZATIONS
1455 if (inst &&
1456 (n->Children[1]->Opcode != IR_SWIZZLE) &&
1457 _slang_is_temp(emitInfo->vt, n->Children[1]->Store) &&
1458 (inst->DstReg.File == n->Children[1]->Store->File) &&
1459 (inst->DstReg.Index == n->Children[1]->Store->Index) &&
1460 !n->Children[0]->Store->IsIndirect &&
1461 n->Children[0]->Store->Size <= 4) {
1462 /* Peephole optimization:
1463 * The Right-Hand-Side has its results in a temporary place.
1464 * Modify the RHS (and the prev instruction) to store its results
1465 * in the destination specified by n->Children[0].
1466 * Then, this MOVE is a no-op.
1467 * Ex:
1468 * MUL tmp, x, y;
1469 * MOV a, tmp;
1470 * becomes:
1471 * MUL a, x, y;
1472 */
1473
1474 /* fixup the previous instruction (which stored the RHS result) */
1475 assert(n->Children[0]->Store->Index >= 0);
1476 storage_to_dst_reg(&inst->DstReg, n->Children[0]->Store);
1477 return inst;
1478 }
1479 else
1480 #endif
1481 {
1482 if (n->Children[0]->Store->Size > 4) {
1483 /* move matrix/struct etc (block of registers) */
1484 slang_ir_storage dstStore = *n->Children[0]->Store;
1485 slang_ir_storage srcStore = *n->Children[1]->Store;
1486 GLint size = srcStore.Size;
1487 ASSERT(n->Children[1]->Store->Swizzle == SWIZZLE_NOOP);
1488 dstStore.Size = 4;
1489 srcStore.Size = 4;
1490 while (size >= 4) {
1491 inst = emit_instruction(emitInfo, OPCODE_MOV,
1492 &dstStore,
1493 &srcStore,
1494 NULL,
1495 NULL);
1496 if (!inst) {
1497 return NULL;
1498 }
1499 inst_comment(inst, "IR_COPY block");
1500 srcStore.Index++;
1501 dstStore.Index++;
1502 size -= 4;
1503 }
1504 }
1505 else {
1506 /* single register move */
1507 char *srcAnnot, *dstAnnot;
1508 assert(n->Children[0]->Store->Index >= 0);
1509 inst = emit_instruction(emitInfo, OPCODE_MOV,
1510 n->Children[0]->Store, /* dest */
1511 n->Children[1]->Store,
1512 NULL,
1513 NULL);
1514 if (!inst) {
1515 return NULL;
1516 }
1517 dstAnnot = storage_annotation(n->Children[0], emitInfo->prog);
1518 srcAnnot = storage_annotation(n->Children[1], emitInfo->prog);
1519 inst->Comment = instruction_annotation(inst->Opcode, dstAnnot,
1520 srcAnnot, NULL, NULL);
1521 }
1522 free_node_storage(emitInfo->vt, n->Children[1]);
1523 return inst;
1524 }
1525 }
1526
1527
1528 /**
1529 * An IR_COND node wraps a boolean expression which is used by an
1530 * IF or WHILE test. This is where we'll set condition codes, if needed.
1531 */
1532 static struct prog_instruction *
1533 emit_cond(slang_emit_info *emitInfo, slang_ir_node *n)
1534 {
1535 struct prog_instruction *inst;
1536
1537 assert(n->Opcode == IR_COND);
1538
1539 if (!n->Children[0])
1540 return NULL;
1541
1542 /* emit code for the expression */
1543 inst = emit(emitInfo, n->Children[0]);
1544
1545 if (!n->Children[0]->Store) {
1546 /* error recovery */
1547 return NULL;
1548 }
1549
1550 assert(n->Children[0]->Store);
1551 /*assert(n->Children[0]->Store->Size == 1);*/
1552
1553 if (emitInfo->EmitCondCodes) {
1554 if (inst &&
1555 n->Children[0]->Store &&
1556 inst->DstReg.File == n->Children[0]->Store->File &&
1557 inst->DstReg.Index == n->Children[0]->Store->Index) {
1558 /* The previous instruction wrote to the register who's value
1559 * we're testing. Just fix that instruction so that the
1560 * condition codes are computed.
1561 */
1562 inst->CondUpdate = GL_TRUE;
1563 n->Store = n->Children[0]->Store;
1564 return inst;
1565 }
1566 else {
1567 /* This'll happen for things like "if (i) ..." where no code
1568 * is normally generated for the expression "i".
1569 * Generate a move instruction just to set condition codes.
1570 */
1571 if (!alloc_node_storage(emitInfo, n, 1))
1572 return NULL;
1573 inst = emit_instruction(emitInfo, OPCODE_MOV,
1574 n->Store, /* dest */
1575 n->Children[0]->Store,
1576 NULL,
1577 NULL);
1578 if (!inst) {
1579 return NULL;
1580 }
1581 inst->CondUpdate = GL_TRUE;
1582 inst_comment(inst, "COND expr");
1583 _slang_free_temp(emitInfo->vt, n->Store);
1584 return inst;
1585 }
1586 }
1587 else {
1588 /* No-op: the boolean result of the expression is in a regular reg */
1589 n->Store = n->Children[0]->Store;
1590 return inst;
1591 }
1592 }
1593
1594
1595 /**
1596 * Logical-NOT
1597 */
1598 static struct prog_instruction *
1599 emit_not(slang_emit_info *emitInfo, slang_ir_node *n)
1600 {
1601 static const struct {
1602 gl_inst_opcode op, opNot;
1603 } operators[] = {
1604 { OPCODE_SLT, OPCODE_SGE },
1605 { OPCODE_SLE, OPCODE_SGT },
1606 { OPCODE_SGT, OPCODE_SLE },
1607 { OPCODE_SGE, OPCODE_SLT },
1608 { OPCODE_SEQ, OPCODE_SNE },
1609 { OPCODE_SNE, OPCODE_SEQ },
1610 { 0, 0 }
1611 };
1612 struct prog_instruction *inst;
1613 slang_ir_storage zero;
1614 GLuint i;
1615
1616 /* child expr */
1617 inst = emit(emitInfo, n->Children[0]);
1618
1619 #if PEEPHOLE_OPTIMIZATIONS
1620 if (inst) {
1621 /* if the prev instruction was a comparison instruction, invert it */
1622 for (i = 0; operators[i].op; i++) {
1623 if (inst->Opcode == operators[i].op) {
1624 inst->Opcode = operators[i].opNot;
1625 n->Store = n->Children[0]->Store;
1626 return inst;
1627 }
1628 }
1629 }
1630 #endif
1631
1632 /* else, invert using SEQ (v = v == 0) */
1633 if (!alloc_node_storage(emitInfo, n, n->Children[0]->Store->Size))
1634 return NULL;
1635
1636 constant_to_storage(emitInfo, 0.0, &zero);
1637 inst = emit_instruction(emitInfo,
1638 OPCODE_SEQ,
1639 n->Store,
1640 n->Children[0]->Store,
1641 &zero,
1642 NULL);
1643 if (!inst) {
1644 return NULL;
1645 }
1646 inst_comment(inst, "NOT");
1647
1648 free_node_storage(emitInfo->vt, n->Children[0]);
1649
1650 return inst;
1651 }
1652
1653
1654 static struct prog_instruction *
1655 emit_if(slang_emit_info *emitInfo, slang_ir_node *n)
1656 {
1657 struct gl_program *prog = emitInfo->prog;
1658 GLuint ifInstLoc, elseInstLoc = 0;
1659 GLuint condWritemask = 0;
1660
1661 /* emit condition expression code */
1662 {
1663 struct prog_instruction *inst;
1664 inst = emit(emitInfo, n->Children[0]);
1665 if (emitInfo->EmitCondCodes) {
1666 if (!inst) {
1667 /* error recovery */
1668 return NULL;
1669 }
1670 condWritemask = inst->DstReg.WriteMask;
1671 }
1672 }
1673
1674 if (!n->Children[0]->Store)
1675 return NULL;
1676
1677 #if 0
1678 assert(n->Children[0]->Store->Size == 1); /* a bool! */
1679 #endif
1680
1681 ifInstLoc = prog->NumInstructions;
1682 if (emitInfo->EmitHighLevelInstructions) {
1683 if (emitInfo->EmitCondCodes) {
1684 /* IF condcode THEN ... */
1685 struct prog_instruction *ifInst = new_instruction(emitInfo, OPCODE_IF);
1686 if (!ifInst) {
1687 return NULL;
1688 }
1689 ifInst->DstReg.CondMask = COND_NE; /* if cond is non-zero */
1690 /* only test the cond code (1 of 4) that was updated by the
1691 * previous instruction.
1692 */
1693 ifInst->DstReg.CondSwizzle = writemask_to_swizzle(condWritemask);
1694 }
1695 else {
1696 struct prog_instruction *inst;
1697
1698 /* IF src[0] THEN ... */
1699 inst = emit_instruction(emitInfo, OPCODE_IF,
1700 NULL, /* dst */
1701 n->Children[0]->Store, /* op0 */
1702 NULL,
1703 NULL);
1704 if (!inst) {
1705 return NULL;
1706 }
1707 }
1708 }
1709 else {
1710 /* conditional jump to else, or endif */
1711 struct prog_instruction *ifInst = new_instruction(emitInfo, OPCODE_BRA);
1712 if (!ifInst) {
1713 return NULL;
1714 }
1715 ifInst->DstReg.CondMask = COND_EQ; /* BRA if cond is zero */
1716 inst_comment(ifInst, "if zero");
1717 ifInst->DstReg.CondSwizzle = writemask_to_swizzle(condWritemask);
1718 }
1719
1720 /* if body */
1721 emit(emitInfo, n->Children[1]);
1722
1723 if (n->Children[2]) {
1724 /* have else body */
1725 elseInstLoc = prog->NumInstructions;
1726 if (emitInfo->EmitHighLevelInstructions) {
1727 struct prog_instruction *inst = new_instruction(emitInfo, OPCODE_ELSE);
1728 if (!inst) {
1729 return NULL;
1730 }
1731 prog->Instructions[ifInstLoc].BranchTarget = prog->NumInstructions - 1;
1732 }
1733 else {
1734 /* jump to endif instruction */
1735 struct prog_instruction *inst = new_instruction(emitInfo, OPCODE_BRA);
1736 if (!inst) {
1737 return NULL;
1738 }
1739 inst_comment(inst, "else");
1740 inst->DstReg.CondMask = COND_TR; /* always branch */
1741 prog->Instructions[ifInstLoc].BranchTarget = prog->NumInstructions;
1742 }
1743 emit(emitInfo, n->Children[2]);
1744 }
1745 else {
1746 /* no else body */
1747 prog->Instructions[ifInstLoc].BranchTarget = prog->NumInstructions;
1748 }
1749
1750 if (emitInfo->EmitHighLevelInstructions) {
1751 struct prog_instruction *inst = new_instruction(emitInfo, OPCODE_ENDIF);
1752 if (!inst) {
1753 return NULL;
1754 }
1755 }
1756
1757 if (elseInstLoc) {
1758 /* point ELSE instruction BranchTarget at ENDIF */
1759 if (emitInfo->EmitHighLevelInstructions) {
1760 prog->Instructions[elseInstLoc].BranchTarget = prog->NumInstructions - 1;
1761 }
1762 else {
1763 prog->Instructions[elseInstLoc].BranchTarget = prog->NumInstructions;
1764 }
1765 }
1766 return NULL;
1767 }
1768
1769
1770 static struct prog_instruction *
1771 emit_loop(slang_emit_info *emitInfo, slang_ir_node *n)
1772 {
1773 struct gl_program *prog = emitInfo->prog;
1774 struct prog_instruction *endInst;
1775 GLuint beginInstLoc, tailInstLoc, endInstLoc;
1776 slang_ir_node *ir;
1777
1778 /* emit OPCODE_BGNLOOP */
1779 beginInstLoc = prog->NumInstructions;
1780 if (emitInfo->EmitHighLevelInstructions) {
1781 struct prog_instruction *inst = new_instruction(emitInfo, OPCODE_BGNLOOP);
1782 if (!inst) {
1783 return NULL;
1784 }
1785 }
1786
1787 /* body */
1788 emit(emitInfo, n->Children[0]);
1789
1790 /* tail */
1791 tailInstLoc = prog->NumInstructions;
1792 if (n->Children[1]) {
1793 if (emitInfo->EmitComments)
1794 emit_comment(emitInfo, "Loop tail code:");
1795 emit(emitInfo, n->Children[1]);
1796 }
1797
1798 endInstLoc = prog->NumInstructions;
1799 if (emitInfo->EmitHighLevelInstructions) {
1800 /* emit OPCODE_ENDLOOP */
1801 endInst = new_instruction(emitInfo, OPCODE_ENDLOOP);
1802 if (!endInst) {
1803 return NULL;
1804 }
1805 }
1806 else {
1807 /* emit unconditional BRA-nch */
1808 endInst = new_instruction(emitInfo, OPCODE_BRA);
1809 if (!endInst) {
1810 return NULL;
1811 }
1812 endInst->DstReg.CondMask = COND_TR; /* always true */
1813 }
1814 /* ENDLOOP's BranchTarget points to the BGNLOOP inst */
1815 endInst->BranchTarget = beginInstLoc;
1816
1817 if (emitInfo->EmitHighLevelInstructions) {
1818 /* BGNLOOP's BranchTarget points to the ENDLOOP inst */
1819 prog->Instructions[beginInstLoc].BranchTarget = prog->NumInstructions -1;
1820 }
1821
1822 /* Done emitting loop code. Now walk over the loop's linked list of
1823 * BREAK and CONT nodes, filling in their BranchTarget fields (which
1824 * will point to the corresponding ENDLOOP instruction.
1825 */
1826 for (ir = n->List; ir; ir = ir->List) {
1827 struct prog_instruction *inst = prog->Instructions + ir->InstLocation;
1828 assert(inst->BranchTarget < 0);
1829 if (ir->Opcode == IR_BREAK ||
1830 ir->Opcode == IR_BREAK_IF_TRUE) {
1831 assert(inst->Opcode == OPCODE_BRK ||
1832 inst->Opcode == OPCODE_BRA);
1833 /* go to instruction at end of loop */
1834 if (emitInfo->EmitHighLevelInstructions) {
1835 inst->BranchTarget = endInstLoc;
1836 }
1837 else {
1838 inst->BranchTarget = endInstLoc + 1;
1839 }
1840 }
1841 else {
1842 assert(ir->Opcode == IR_CONT ||
1843 ir->Opcode == IR_CONT_IF_TRUE);
1844 assert(inst->Opcode == OPCODE_CONT ||
1845 inst->Opcode == OPCODE_BRA);
1846 /* go to instruction at tail of loop */
1847 inst->BranchTarget = endInstLoc;
1848 }
1849 }
1850 return NULL;
1851 }
1852
1853
1854 /**
1855 * Unconditional "continue" or "break" statement.
1856 * Either OPCODE_CONT, OPCODE_BRK or OPCODE_BRA will be emitted.
1857 */
1858 static struct prog_instruction *
1859 emit_cont_break(slang_emit_info *emitInfo, slang_ir_node *n)
1860 {
1861 gl_inst_opcode opcode;
1862 struct prog_instruction *inst;
1863
1864 if (n->Opcode == IR_CONT) {
1865 /* we need to execute the loop's tail code before doing CONT */
1866 assert(n->Parent);
1867 assert(n->Parent->Opcode == IR_LOOP);
1868 if (n->Parent->Children[1]) {
1869 /* emit tail code */
1870 if (emitInfo->EmitComments) {
1871 emit_comment(emitInfo, "continue - tail code:");
1872 }
1873 emit(emitInfo, n->Parent->Children[1]);
1874 }
1875 }
1876
1877 /* opcode selection */
1878 if (emitInfo->EmitHighLevelInstructions) {
1879 opcode = (n->Opcode == IR_CONT) ? OPCODE_CONT : OPCODE_BRK;
1880 }
1881 else {
1882 opcode = OPCODE_BRA;
1883 }
1884 n->InstLocation = emitInfo->prog->NumInstructions;
1885 inst = new_instruction(emitInfo, opcode);
1886 if (inst) {
1887 inst->DstReg.CondMask = COND_TR; /* always true */
1888 }
1889 return inst;
1890 }
1891
1892
1893 /**
1894 * Conditional "continue" or "break" statement.
1895 * Either OPCODE_CONT, OPCODE_BRK or OPCODE_BRA will be emitted.
1896 */
1897 static struct prog_instruction *
1898 emit_cont_break_if_true(slang_emit_info *emitInfo, slang_ir_node *n)
1899 {
1900 struct prog_instruction *inst;
1901
1902 assert(n->Opcode == IR_CONT_IF_TRUE ||
1903 n->Opcode == IR_BREAK_IF_TRUE);
1904
1905 /* evaluate condition expr, setting cond codes */
1906 inst = emit(emitInfo, n->Children[0]);
1907 if (emitInfo->EmitCondCodes) {
1908 assert(inst);
1909 inst->CondUpdate = GL_TRUE;
1910 }
1911
1912 n->InstLocation = emitInfo->prog->NumInstructions;
1913
1914 /* opcode selection */
1915 if (emitInfo->EmitHighLevelInstructions) {
1916 const gl_inst_opcode opcode
1917 = (n->Opcode == IR_CONT_IF_TRUE) ? OPCODE_CONT : OPCODE_BRK;
1918 if (emitInfo->EmitCondCodes) {
1919 /* Get the writemask from the previous instruction which set
1920 * the condcodes. Use that writemask as the CondSwizzle.
1921 */
1922 const GLuint condWritemask = inst->DstReg.WriteMask;
1923 inst = new_instruction(emitInfo, opcode);
1924 if (inst) {
1925 inst->DstReg.CondMask = COND_NE;
1926 inst->DstReg.CondSwizzle = writemask_to_swizzle(condWritemask);
1927 }
1928 return inst;
1929 }
1930 else {
1931 /* IF reg
1932 * BRK/CONT;
1933 * ENDIF
1934 */
1935 GLint ifInstLoc;
1936 ifInstLoc = emitInfo->prog->NumInstructions;
1937 inst = emit_instruction(emitInfo, OPCODE_IF,
1938 NULL, /* dest */
1939 n->Children[0]->Store,
1940 NULL,
1941 NULL);
1942 if (!inst) {
1943 return NULL;
1944 }
1945 n->InstLocation = emitInfo->prog->NumInstructions;
1946
1947 inst = new_instruction(emitInfo, opcode);
1948 if (!inst) {
1949 return NULL;
1950 }
1951 inst = new_instruction(emitInfo, OPCODE_ENDIF);
1952 if (!inst) {
1953 return NULL;
1954 }
1955
1956 emitInfo->prog->Instructions[ifInstLoc].BranchTarget
1957 = emitInfo->prog->NumInstructions - 1;
1958 return inst;
1959 }
1960 }
1961 else {
1962 const GLuint condWritemask = inst->DstReg.WriteMask;
1963 assert(emitInfo->EmitCondCodes);
1964 inst = new_instruction(emitInfo, OPCODE_BRA);
1965 if (inst) {
1966 inst->DstReg.CondMask = COND_NE;
1967 inst->DstReg.CondSwizzle = writemask_to_swizzle(condWritemask);
1968 }
1969 return inst;
1970 }
1971 }
1972
1973
1974 /**
1975 * Return the size of a swizzle mask given that some swizzle components
1976 * may be NIL/undefined. For example:
1977 * swizzle_size(".zzxx") = 4
1978 * swizzle_size(".xy??") = 2
1979 * swizzle_size(".w???") = 1
1980 */
1981 static GLuint
1982 swizzle_size(GLuint swizzle)
1983 {
1984 GLuint i;
1985 for (i = 0; i < 4; i++) {
1986 if (GET_SWZ(swizzle, i) == SWIZZLE_NIL)
1987 return i;
1988 }
1989 return 4;
1990 }
1991
1992
1993 static struct prog_instruction *
1994 emit_swizzle(slang_emit_info *emitInfo, slang_ir_node *n)
1995 {
1996 struct prog_instruction *inst;
1997
1998 inst = emit(emitInfo, n->Children[0]);
1999
2000 if (!n->Store->Parent) {
2001 /* this covers a case such as "(b ? p : q).x" */
2002 n->Store->Parent = n->Children[0]->Store;
2003 assert(n->Store->Parent);
2004 }
2005
2006 {
2007 const GLuint swizzle = n->Store->Swizzle;
2008 /* new storage is parent storage with updated Swizzle + Size fields */
2009 _slang_copy_ir_storage(n->Store, n->Store->Parent);
2010 /* Apply this node's swizzle to parent's storage */
2011 n->Store->Swizzle = _slang_swizzle_swizzle(n->Store->Swizzle, swizzle);
2012 /* Update size */
2013 n->Store->Size = swizzle_size(n->Store->Swizzle);
2014 }
2015
2016 assert(!n->Store->Parent);
2017 assert(n->Store->Index >= 0);
2018
2019 return inst;
2020 }
2021
2022
2023 /**
2024 * Dereference array element: element == array[index]
2025 * This basically involves emitting code for computing the array index
2026 * and updating the node/element's storage info.
2027 */
2028 static struct prog_instruction *
2029 emit_array_element(slang_emit_info *emitInfo, slang_ir_node *n)
2030 {
2031 slang_ir_storage *arrayStore, *indexStore;
2032 const int elemSize = n->Store->Size; /* number of floats */
2033 const GLint elemSizeVec = (elemSize + 3) / 4; /* number of vec4 */
2034 struct prog_instruction *inst;
2035
2036 assert(n->Opcode == IR_ELEMENT);
2037 assert(elemSize > 0);
2038
2039 /* special case for built-in state variables, like light state */
2040 {
2041 slang_ir_storage *root = n->Store;
2042 assert(!root->Parent);
2043 while (root->Parent)
2044 root = root->Parent;
2045
2046 if (root->File == PROGRAM_STATE_VAR) {
2047 GLboolean direct;
2048 GLint index =
2049 _slang_alloc_statevar(n, emitInfo->prog->Parameters, &direct);
2050 if (index < 0) {
2051 /* error */
2052 return NULL;
2053 }
2054 if (direct) {
2055 n->Store->Index = index;
2056 return NULL; /* all done */
2057 }
2058 }
2059 }
2060
2061 /* do codegen for array itself */
2062 emit(emitInfo, n->Children[0]);
2063 arrayStore = n->Children[0]->Store;
2064
2065 /* The initial array element storage is the array's storage,
2066 * then modified below.
2067 */
2068 _slang_copy_ir_storage(n->Store, arrayStore);
2069
2070
2071 if (n->Children[1]->Opcode == IR_FLOAT) {
2072 /* Constant array index */
2073 const GLint element = (GLint) n->Children[1]->Value[0];
2074
2075 /* this element's storage is the array's storage, plus constant offset */
2076 n->Store->Index += elemSizeVec * element;
2077 }
2078 else {
2079 /* Variable array index */
2080
2081 /* do codegen for array index expression */
2082 emit(emitInfo, n->Children[1]);
2083 indexStore = n->Children[1]->Store;
2084
2085 if (indexStore->IsIndirect) {
2086 /* need to put the array index into a temporary since we can't
2087 * directly support a[b[i]] constructs.
2088 */
2089
2090
2091 /*indexStore = tempstore();*/
2092 }
2093
2094
2095 if (elemSize > 4) {
2096 /* need to multiply array index by array element size */
2097 struct prog_instruction *inst;
2098 slang_ir_storage *indexTemp;
2099 slang_ir_storage elemSizeStore;
2100
2101 /* allocate 1 float indexTemp */
2102 indexTemp = _slang_new_ir_storage(PROGRAM_TEMPORARY, -1, 1);
2103 _slang_alloc_temp(emitInfo->vt, indexTemp);
2104
2105 /* allocate a constant containing the element size */
2106 constant_to_storage(emitInfo, (float) elemSizeVec, &elemSizeStore);
2107
2108 /* multiply array index by element size */
2109 inst = emit_instruction(emitInfo,
2110 OPCODE_MUL,
2111 indexTemp, /* dest */
2112 indexStore, /* the index */
2113 &elemSizeStore,
2114 NULL);
2115 if (!inst) {
2116 return NULL;
2117 }
2118
2119 indexStore = indexTemp;
2120 }
2121
2122 if (arrayStore->IsIndirect) {
2123 /* ex: in a[i][j], a[i] (the arrayStore) is indirect */
2124 /* Need to add indexStore to arrayStore->Indirect store */
2125 slang_ir_storage indirectArray;
2126 slang_ir_storage *indexTemp;
2127
2128 _slang_init_ir_storage(&indirectArray,
2129 arrayStore->IndirectFile,
2130 arrayStore->IndirectIndex,
2131 1,
2132 arrayStore->IndirectSwizzle);
2133
2134 /* allocate 1 float indexTemp */
2135 indexTemp = _slang_new_ir_storage(PROGRAM_TEMPORARY, -1, 1);
2136 _slang_alloc_temp(emitInfo->vt, indexTemp);
2137
2138 inst = emit_instruction(emitInfo,
2139 OPCODE_ADD,
2140 indexTemp, /* dest */
2141 indexStore, /* the index */
2142 &indirectArray, /* indirect array base */
2143 NULL);
2144 if (!inst) {
2145 return NULL;
2146 }
2147
2148 indexStore = indexTemp;
2149 }
2150
2151 /* update the array element storage info */
2152 n->Store->IsIndirect = GL_TRUE;
2153 n->Store->IndirectFile = indexStore->File;
2154 n->Store->IndirectIndex = indexStore->Index;
2155 n->Store->IndirectSwizzle = indexStore->Swizzle;
2156 }
2157
2158 n->Store->Size = elemSize;
2159 n->Store->Swizzle = _slang_var_swizzle(elemSize, 0);
2160
2161 return NULL; /* no instruction */
2162 }
2163
2164
2165 /**
2166 * Resolve storage for accessing a structure field.
2167 */
2168 static struct prog_instruction *
2169 emit_struct_field(slang_emit_info *emitInfo, slang_ir_node *n)
2170 {
2171 slang_ir_storage *root = n->Store;
2172 GLint fieldOffset, fieldSize;
2173
2174 assert(n->Opcode == IR_FIELD);
2175
2176 assert(!root->Parent);
2177 while (root->Parent)
2178 root = root->Parent;
2179
2180 /* If this is the field of a state var, allocate constant/uniform
2181 * storage for it now if we haven't already.
2182 * Note that we allocate storage (uniform/constant slots) for state
2183 * variables here rather than at declaration time so we only allocate
2184 * space for the ones that we actually use!
2185 */
2186 if (root->File == PROGRAM_STATE_VAR) {
2187 GLboolean direct;
2188 GLint index = _slang_alloc_statevar(n, emitInfo->prog->Parameters, &direct);
2189 if (index < 0) {
2190 slang_info_log_error(emitInfo->log, "Error parsing state variable");
2191 return NULL;
2192 }
2193 if (direct) {
2194 root->Index = index;
2195 return NULL; /* all done */
2196 }
2197 }
2198
2199 /* do codegen for struct */
2200 emit(emitInfo, n->Children[0]);
2201 assert(n->Children[0]->Store->Index >= 0);
2202
2203
2204 fieldOffset = n->Store->Index;
2205 fieldSize = n->Store->Size;
2206
2207 _slang_copy_ir_storage(n->Store, n->Children[0]->Store);
2208
2209 n->Store->Index = n->Children[0]->Store->Index + fieldOffset / 4;
2210 n->Store->Size = fieldSize;
2211
2212 switch (fieldSize) {
2213 case 1:
2214 {
2215 GLint swz = fieldOffset % 4;
2216 n->Store->Swizzle = MAKE_SWIZZLE4(swz, swz, swz, swz);
2217 }
2218 break;
2219 case 2:
2220 n->Store->Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
2221 SWIZZLE_NIL, SWIZZLE_NIL);
2222 break;
2223 case 3:
2224 n->Store->Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
2225 SWIZZLE_Z, SWIZZLE_NIL);
2226 break;
2227 default:
2228 n->Store->Swizzle = SWIZZLE_XYZW;
2229 }
2230
2231 assert(n->Store->Index >= 0);
2232
2233 return NULL; /* no instruction */
2234 }
2235
2236
2237 /**
2238 * Emit code for a variable declaration.
2239 * This usually doesn't result in any code generation, but just
2240 * memory allocation.
2241 */
2242 static struct prog_instruction *
2243 emit_var_decl(slang_emit_info *emitInfo, slang_ir_node *n)
2244 {
2245 assert(n->Store);
2246 assert(n->Store->File != PROGRAM_UNDEFINED);
2247 assert(n->Store->Size > 0);
2248 /*assert(n->Store->Index < 0);*/
2249
2250 if (!n->Var || n->Var->isTemp) {
2251 /* a nameless/temporary variable, will be freed after first use */
2252 /*NEW*/
2253 if (n->Store->Index < 0 && !_slang_alloc_temp(emitInfo->vt, n->Store)) {
2254 slang_info_log_error(emitInfo->log,
2255 "Ran out of registers, too many temporaries");
2256 return NULL;
2257 }
2258 }
2259 else {
2260 /* a regular variable */
2261 _slang_add_variable(emitInfo->vt, n->Var);
2262 if (!_slang_alloc_var(emitInfo->vt, n->Store)) {
2263 slang_info_log_error(emitInfo->log,
2264 "Ran out of registers, too many variables");
2265 return NULL;
2266 }
2267 /*
2268 printf("IR_VAR_DECL %s %d store %p\n",
2269 (char*) n->Var->a_name, n->Store->Index, (void*) n->Store);
2270 */
2271 assert(n->Var->store == n->Store);
2272 }
2273 if (emitInfo->EmitComments) {
2274 /* emit NOP with comment describing the variable's storage location */
2275 char s[1000];
2276 _mesa_snprintf(s, sizeof(s), "TEMP[%d]%s = variable %s (size %d)",
2277 n->Store->Index,
2278 _mesa_swizzle_string(n->Store->Swizzle, 0, GL_FALSE),
2279 (n->Var ? (char *) n->Var->a_name : "anonymous"),
2280 n->Store->Size);
2281 emit_comment(emitInfo, s);
2282 }
2283 return NULL;
2284 }
2285
2286
2287 /**
2288 * Emit code for a reference to a variable.
2289 * Actually, no code is generated but we may do some memory allocation.
2290 * In particular, state vars (uniforms) are allocated on an as-needed basis.
2291 */
2292 static struct prog_instruction *
2293 emit_var_ref(slang_emit_info *emitInfo, slang_ir_node *n)
2294 {
2295 assert(n->Store);
2296 assert(n->Store->File != PROGRAM_UNDEFINED);
2297
2298 if (n->Store->File == PROGRAM_STATE_VAR && n->Store->Index < 0) {
2299 GLboolean direct;
2300 GLint index = _slang_alloc_statevar(n, emitInfo->prog->Parameters, &direct);
2301 if (index < 0) {
2302 /* error */
2303 char s[100];
2304 /* XXX isn't this really an out of memory/resources error? */
2305 _mesa_snprintf(s, sizeof(s), "Undefined variable '%s'",
2306 (char *) n->Var->a_name);
2307 slang_info_log_error(emitInfo->log, s);
2308 return NULL;
2309 }
2310
2311 n->Store->Index = index;
2312 }
2313 else if (n->Store->File == PROGRAM_UNIFORM ||
2314 n->Store->File == PROGRAM_SAMPLER) {
2315 /* mark var as used */
2316 _mesa_use_uniform(emitInfo->prog->Parameters, (char *) n->Var->a_name);
2317 }
2318 else if (n->Store->File == PROGRAM_INPUT) {
2319 assert(n->Store->Index >= 0);
2320 emitInfo->prog->InputsRead |= (1 << n->Store->Index);
2321 }
2322
2323 if (n->Store->Index < 0) {
2324 /* probably ran out of registers */
2325 return NULL;
2326 }
2327 assert(n->Store->Size > 0);
2328
2329 return NULL;
2330 }
2331
2332
2333 static struct prog_instruction *
2334 emit(slang_emit_info *emitInfo, slang_ir_node *n)
2335 {
2336 struct prog_instruction *inst;
2337 if (!n)
2338 return NULL;
2339
2340 if (emitInfo->log->error_flag) {
2341 return NULL;
2342 }
2343
2344 if (n->Comment) {
2345 inst = new_instruction(emitInfo, OPCODE_NOP);
2346 if (inst) {
2347 inst->Comment = _mesa_strdup(n->Comment);
2348 }
2349 inst = NULL;
2350 }
2351
2352 switch (n->Opcode) {
2353 case IR_SEQ:
2354 /* sequence of two sub-trees */
2355 assert(n->Children[0]);
2356 assert(n->Children[1]);
2357 emit(emitInfo, n->Children[0]);
2358 if (emitInfo->log->error_flag)
2359 return NULL;
2360 inst = emit(emitInfo, n->Children[1]);
2361 #if 0
2362 assert(!n->Store);
2363 #endif
2364 n->Store = n->Children[1]->Store;
2365 return inst;
2366
2367 case IR_SCOPE:
2368 /* new variable scope */
2369 _slang_push_var_table(emitInfo->vt);
2370 inst = emit(emitInfo, n->Children[0]);
2371 _slang_pop_var_table(emitInfo->vt);
2372 return inst;
2373
2374 case IR_VAR_DECL:
2375 /* Variable declaration - allocate a register for it */
2376 inst = emit_var_decl(emitInfo, n);
2377 return inst;
2378
2379 case IR_VAR:
2380 /* Reference to a variable
2381 * Storage should have already been resolved/allocated.
2382 */
2383 return emit_var_ref(emitInfo, n);
2384
2385 case IR_ELEMENT:
2386 return emit_array_element(emitInfo, n);
2387 case IR_FIELD:
2388 return emit_struct_field(emitInfo, n);
2389 case IR_SWIZZLE:
2390 return emit_swizzle(emitInfo, n);
2391
2392 /* Simple arithmetic */
2393 /* unary */
2394 case IR_MOVE:
2395 case IR_RSQ:
2396 case IR_RCP:
2397 case IR_FLOOR:
2398 case IR_FRAC:
2399 case IR_F_TO_I:
2400 case IR_I_TO_F:
2401 case IR_ABS:
2402 case IR_SIN:
2403 case IR_COS:
2404 case IR_DDX:
2405 case IR_DDY:
2406 case IR_EXP:
2407 case IR_EXP2:
2408 case IR_LOG2:
2409 case IR_NOISE1:
2410 case IR_NOISE2:
2411 case IR_NOISE3:
2412 case IR_NOISE4:
2413 case IR_NRM4:
2414 case IR_NRM3:
2415 /* binary */
2416 case IR_ADD:
2417 case IR_SUB:
2418 case IR_MUL:
2419 case IR_DOT4:
2420 case IR_DOT3:
2421 case IR_DOT2:
2422 case IR_CROSS:
2423 case IR_MIN:
2424 case IR_MAX:
2425 case IR_SEQUAL:
2426 case IR_SNEQUAL:
2427 case IR_SGE:
2428 case IR_SGT:
2429 case IR_SLE:
2430 case IR_SLT:
2431 case IR_POW:
2432 /* trinary operators */
2433 case IR_LRP:
2434 case IR_CMP:
2435 return emit_arith(emitInfo, n);
2436
2437 case IR_EQUAL:
2438 case IR_NOTEQUAL:
2439 return emit_compare(emitInfo, n);
2440
2441 case IR_CLAMP:
2442 return emit_clamp(emitInfo, n);
2443 case IR_TEX:
2444 case IR_TEXB:
2445 case IR_TEXP:
2446 case IR_TEX_SH:
2447 case IR_TEXB_SH:
2448 case IR_TEXP_SH:
2449 return emit_tex(emitInfo, n);
2450 case IR_NEG:
2451 return emit_negation(emitInfo, n);
2452 case IR_FLOAT:
2453 /* find storage location for this float constant */
2454 n->Store->Index = _mesa_add_unnamed_constant(emitInfo->prog->Parameters,
2455 n->Value,
2456 n->Store->Size,
2457 &n->Store->Swizzle);
2458 if (n->Store->Index < 0) {
2459 slang_info_log_error(emitInfo->log, "Ran out of space for constants");
2460 return NULL;
2461 }
2462 return NULL;
2463
2464 case IR_COPY:
2465 return emit_copy(emitInfo, n);
2466
2467 case IR_COND:
2468 return emit_cond(emitInfo, n);
2469
2470 case IR_NOT:
2471 return emit_not(emitInfo, n);
2472
2473 case IR_LABEL:
2474 return emit_label(emitInfo, n);
2475
2476 case IR_KILL:
2477 return emit_kill(emitInfo);
2478
2479 case IR_CALL:
2480 /* new variable scope for subroutines/function calls */
2481 _slang_push_var_table(emitInfo->vt);
2482 inst = emit_fcall(emitInfo, n);
2483 _slang_pop_var_table(emitInfo->vt);
2484 return inst;
2485
2486 case IR_IF:
2487 return emit_if(emitInfo, n);
2488
2489 case IR_LOOP:
2490 return emit_loop(emitInfo, n);
2491 case IR_BREAK_IF_TRUE:
2492 case IR_CONT_IF_TRUE:
2493 return emit_cont_break_if_true(emitInfo, n);
2494 case IR_BREAK:
2495 /* fall-through */
2496 case IR_CONT:
2497 return emit_cont_break(emitInfo, n);
2498
2499 case IR_BEGIN_SUB:
2500 return new_instruction(emitInfo, OPCODE_BGNSUB);
2501 case IR_END_SUB:
2502 return new_instruction(emitInfo, OPCODE_ENDSUB);
2503 case IR_RETURN:
2504 return emit_return(emitInfo, n);
2505
2506 case IR_NOP:
2507 return NULL;
2508
2509 case IR_EMIT_VERTEX:
2510 return new_instruction(emitInfo, OPCODE_EMIT_VERTEX);
2511 case IR_END_PRIMITIVE:
2512 return new_instruction(emitInfo, OPCODE_END_PRIMITIVE);
2513
2514 default:
2515 _mesa_problem(NULL, "Unexpected IR opcode in emit()\n");
2516 }
2517 return NULL;
2518 }
2519
2520
2521 /**
2522 * After code generation, any subroutines will be in separate program
2523 * objects. This function appends all the subroutines onto the main
2524 * program and resolves the linking of all the branch/call instructions.
2525 * XXX this logic should really be part of the linking process...
2526 */
2527 static void
2528 _slang_resolve_subroutines(slang_emit_info *emitInfo)
2529 {
2530 GET_CURRENT_CONTEXT(ctx);
2531 struct gl_program *mainP = emitInfo->prog;
2532 GLuint *subroutineLoc, i, total;
2533
2534 subroutineLoc
2535 = (GLuint *) malloc(emitInfo->NumSubroutines * sizeof(GLuint));
2536
2537 /* total number of instructions */
2538 total = mainP->NumInstructions;
2539 for (i = 0; i < emitInfo->NumSubroutines; i++) {
2540 subroutineLoc[i] = total;
2541 total += emitInfo->Subroutines[i]->NumInstructions;
2542 }
2543
2544 /* adjust BranchTargets within the functions */
2545 for (i = 0; i < emitInfo->NumSubroutines; i++) {
2546 struct gl_program *sub = emitInfo->Subroutines[i];
2547 GLuint j;
2548 for (j = 0; j < sub->NumInstructions; j++) {
2549 struct prog_instruction *inst = sub->Instructions + j;
2550 if (inst->Opcode != OPCODE_CAL && inst->BranchTarget >= 0) {
2551 inst->BranchTarget += subroutineLoc[i];
2552 }
2553 }
2554 }
2555
2556 /* append subroutines' instructions after main's instructions */
2557 mainP->Instructions = _mesa_realloc_instructions(mainP->Instructions,
2558 mainP->NumInstructions,
2559 total);
2560 mainP->NumInstructions = total;
2561 for (i = 0; i < emitInfo->NumSubroutines; i++) {
2562 struct gl_program *sub = emitInfo->Subroutines[i];
2563 _mesa_copy_instructions(mainP->Instructions + subroutineLoc[i],
2564 sub->Instructions,
2565 sub->NumInstructions);
2566 /* delete subroutine code */
2567 sub->Parameters = NULL; /* prevent double-free */
2568 _mesa_reference_program(ctx, &emitInfo->Subroutines[i], NULL);
2569 }
2570
2571 /* free subroutine list */
2572 if (emitInfo->Subroutines) {
2573 free(emitInfo->Subroutines);
2574 emitInfo->Subroutines = NULL;
2575 }
2576 emitInfo->NumSubroutines = 0;
2577
2578 /* Examine CAL instructions.
2579 * At this point, the BranchTarget field of the CAL instruction is
2580 * the number/id of the subroutine to call (an index into the
2581 * emitInfo->Subroutines list).
2582 * Translate that into an actual instruction location now.
2583 */
2584 for (i = 0; i < mainP->NumInstructions; i++) {
2585 struct prog_instruction *inst = mainP->Instructions + i;
2586 if (inst->Opcode == OPCODE_CAL) {
2587 const GLuint f = inst->BranchTarget;
2588 inst->BranchTarget = subroutineLoc[f];
2589 }
2590 }
2591
2592 free(subroutineLoc);
2593 }
2594
2595
2596
2597 /**
2598 * Convert the IR tree into GPU instructions.
2599 * \param n root of IR tree
2600 * \param vt variable table
2601 * \param prog program to put GPU instructions into
2602 * \param pragmas controls codegen options
2603 * \param withEnd if true, emit END opcode at end
2604 * \param log log for emitting errors/warnings/info
2605 */
2606 GLboolean
2607 _slang_emit_code(slang_ir_node *n, slang_var_table *vt,
2608 struct gl_program *prog,
2609 const struct gl_sl_pragmas *pragmas,
2610 GLboolean withEnd,
2611 slang_info_log *log)
2612 {
2613 GET_CURRENT_CONTEXT(ctx);
2614 GLboolean success;
2615 slang_emit_info emitInfo;
2616 GLuint maxUniforms;
2617
2618 emitInfo.log = log;
2619 emitInfo.vt = vt;
2620 emitInfo.prog = prog;
2621 emitInfo.Subroutines = NULL;
2622 emitInfo.NumSubroutines = 0;
2623 emitInfo.MaxInstructions = prog->NumInstructions;
2624
2625 emitInfo.EmitHighLevelInstructions = ctx->Shader.EmitHighLevelInstructions;
2626 emitInfo.EmitCondCodes = ctx->Shader.EmitCondCodes;
2627 emitInfo.EmitComments = ctx->Shader.EmitComments || pragmas->Debug;
2628 emitInfo.EmitBeginEndSub = GL_TRUE;
2629
2630 if (!emitInfo.EmitCondCodes) {
2631 emitInfo.EmitHighLevelInstructions = GL_TRUE;
2632 }
2633
2634 /* Check uniform/constant limits */
2635 if (prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
2636 maxUniforms = ctx->Const.FragmentProgram.MaxUniformComponents / 4;
2637 }
2638 else if (prog->Target == GL_VERTEX_PROGRAM_ARB) {
2639 maxUniforms = ctx->Const.VertexProgram.MaxUniformComponents / 4;
2640 } else {
2641 assert(prog->Target == MESA_GEOMETRY_PROGRAM);
2642 maxUniforms = ctx->Const.GeometryProgram.MaxUniformComponents / 4;
2643 }
2644 if (prog->Parameters->NumParameters > maxUniforms) {
2645 slang_info_log_error(log, "Constant/uniform register limit exceeded "
2646 "(max=%u vec4)", maxUniforms);
2647
2648 return GL_FALSE;
2649 }
2650
2651 (void) emit(&emitInfo, n);
2652
2653 /* finish up by adding the END opcode to program */
2654 if (withEnd) {
2655 struct prog_instruction *inst;
2656 inst = new_instruction(&emitInfo, OPCODE_END);
2657 if (!inst) {
2658 return GL_FALSE;
2659 }
2660 }
2661
2662 _slang_resolve_subroutines(&emitInfo);
2663
2664 success = GL_TRUE;
2665
2666 #if 0
2667 printf("*********** End emit code (%u inst):\n", prog->NumInstructions);
2668 _mesa_print_program(prog);
2669 _mesa_print_program_parameters(ctx,prog);
2670 #endif
2671
2672 return success;
2673 }