glsl2: Move the common optimization passes to a helper function.
[mesa.git] / src / mesa / program / ir_to_mesa.cpp
1 /*
2 * Copyright (C) 2005-2007 Brian Paul All Rights Reserved.
3 * Copyright (C) 2008 VMware, Inc. All Rights Reserved.
4 * Copyright © 2010 Intel Corporation
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
24 */
25
26 /**
27 * \file ir_to_mesa.cpp
28 *
29 * Translates the IR to ARB_fragment_program text if possible,
30 * printing the result
31 */
32
33 #include <stdio.h>
34 #include "main/compiler.h"
35 #include "ir.h"
36 #include "ir_visitor.h"
37 #include "ir_print_visitor.h"
38 #include "ir_expression_flattening.h"
39 #include "glsl_types.h"
40 #include "glsl_parser_extras.h"
41 #include "../glsl/program.h"
42 #include "ir_optimization.h"
43 #include "ast.h"
44
45 extern "C" {
46 #include "main/mtypes.h"
47 #include "main/shaderapi.h"
48 #include "main/shaderobj.h"
49 #include "main/uniforms.h"
50 #include "program/hash_table.h"
51 #include "program/prog_instruction.h"
52 #include "program/prog_optimize.h"
53 #include "program/prog_print.h"
54 #include "program/program.h"
55 #include "program/prog_uniform.h"
56 #include "program/prog_parameter.h"
57 }
58
59 static int swizzle_for_size(int size);
60
61 /**
62 * This struct is a corresponding struct to Mesa prog_src_register, with
63 * wider fields.
64 */
65 typedef struct ir_to_mesa_src_reg {
66 ir_to_mesa_src_reg(int file, int index, const glsl_type *type)
67 {
68 this->file = file;
69 this->index = index;
70 if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
71 this->swizzle = swizzle_for_size(type->vector_elements);
72 else
73 this->swizzle = SWIZZLE_XYZW;
74 this->negate = 0;
75 this->reladdr = NULL;
76 }
77
78 ir_to_mesa_src_reg()
79 {
80 this->file = PROGRAM_UNDEFINED;
81 }
82
83 int file; /**< PROGRAM_* from Mesa */
84 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
85 GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
86 int negate; /**< NEGATE_XYZW mask from mesa */
87 /** Register index should be offset by the integer in this reg. */
88 ir_to_mesa_src_reg *reladdr;
89 } ir_to_mesa_src_reg;
90
91 typedef struct ir_to_mesa_dst_reg {
92 int file; /**< PROGRAM_* from Mesa */
93 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
94 int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
95 GLuint cond_mask:4;
96 /** Register index should be offset by the integer in this reg. */
97 ir_to_mesa_src_reg *reladdr;
98 } ir_to_mesa_dst_reg;
99
100 extern ir_to_mesa_src_reg ir_to_mesa_undef;
101
102 class ir_to_mesa_instruction : public exec_node {
103 public:
104 enum prog_opcode op;
105 ir_to_mesa_dst_reg dst_reg;
106 ir_to_mesa_src_reg src_reg[3];
107 /** Pointer to the ir source this tree came from for debugging */
108 ir_instruction *ir;
109 GLboolean cond_update;
110 int sampler; /**< sampler index */
111 int tex_target; /**< One of TEXTURE_*_INDEX */
112 GLboolean tex_shadow;
113
114 class function_entry *function; /* Set on OPCODE_CAL or OPCODE_BGNSUB */
115 };
116
117 class variable_storage : public exec_node {
118 public:
119 variable_storage(ir_variable *var, int file, int index)
120 : file(file), index(index), var(var)
121 {
122 /* empty */
123 }
124
125 int file;
126 int index;
127 ir_variable *var; /* variable that maps to this, if any */
128 };
129
130 class function_entry : public exec_node {
131 public:
132 ir_function_signature *sig;
133
134 /**
135 * identifier of this function signature used by the program.
136 *
137 * At the point that Mesa instructions for function calls are
138 * generated, we don't know the address of the first instruction of
139 * the function body. So we make the BranchTarget that is called a
140 * small integer and rewrite them during set_branchtargets().
141 */
142 int sig_id;
143
144 /**
145 * Pointer to first instruction of the function body.
146 *
147 * Set during function body emits after main() is processed.
148 */
149 ir_to_mesa_instruction *bgn_inst;
150
151 /**
152 * Index of the first instruction of the function body in actual
153 * Mesa IR.
154 *
155 * Set after convertion from ir_to_mesa_instruction to prog_instruction.
156 */
157 int inst;
158
159 /** Storage for the return value. */
160 ir_to_mesa_src_reg return_reg;
161 };
162
163 class ir_to_mesa_visitor : public ir_visitor {
164 public:
165 ir_to_mesa_visitor();
166 ~ir_to_mesa_visitor();
167
168 function_entry *current_function;
169
170 GLcontext *ctx;
171 struct gl_program *prog;
172
173 int next_temp;
174
175 variable_storage *find_variable_storage(ir_variable *var);
176
177 function_entry *get_function_signature(ir_function_signature *sig);
178
179 ir_to_mesa_src_reg get_temp(const glsl_type *type);
180 void reladdr_to_temp(ir_instruction *ir,
181 ir_to_mesa_src_reg *reg, int *num_reladdr);
182
183 struct ir_to_mesa_src_reg src_reg_for_float(float val);
184
185 /**
186 * \name Visit methods
187 *
188 * As typical for the visitor pattern, there must be one \c visit method for
189 * each concrete subclass of \c ir_instruction. Virtual base classes within
190 * the hierarchy should not have \c visit methods.
191 */
192 /*@{*/
193 virtual void visit(ir_variable *);
194 virtual void visit(ir_loop *);
195 virtual void visit(ir_loop_jump *);
196 virtual void visit(ir_function_signature *);
197 virtual void visit(ir_function *);
198 virtual void visit(ir_expression *);
199 virtual void visit(ir_swizzle *);
200 virtual void visit(ir_dereference_variable *);
201 virtual void visit(ir_dereference_array *);
202 virtual void visit(ir_dereference_record *);
203 virtual void visit(ir_assignment *);
204 virtual void visit(ir_constant *);
205 virtual void visit(ir_call *);
206 virtual void visit(ir_return *);
207 virtual void visit(ir_discard *);
208 virtual void visit(ir_texture *);
209 virtual void visit(ir_if *);
210 /*@}*/
211
212 struct ir_to_mesa_src_reg result;
213
214 /** List of variable_storage */
215 exec_list variables;
216
217 /** List of function_entry */
218 exec_list function_signatures;
219 int next_signature_id;
220
221 /** List of ir_to_mesa_instruction */
222 exec_list instructions;
223
224 ir_to_mesa_instruction *ir_to_mesa_emit_op0(ir_instruction *ir,
225 enum prog_opcode op);
226
227 ir_to_mesa_instruction *ir_to_mesa_emit_op1(ir_instruction *ir,
228 enum prog_opcode op,
229 ir_to_mesa_dst_reg dst,
230 ir_to_mesa_src_reg src0);
231
232 ir_to_mesa_instruction *ir_to_mesa_emit_op2(ir_instruction *ir,
233 enum prog_opcode op,
234 ir_to_mesa_dst_reg dst,
235 ir_to_mesa_src_reg src0,
236 ir_to_mesa_src_reg src1);
237
238 ir_to_mesa_instruction *ir_to_mesa_emit_op3(ir_instruction *ir,
239 enum prog_opcode op,
240 ir_to_mesa_dst_reg dst,
241 ir_to_mesa_src_reg src0,
242 ir_to_mesa_src_reg src1,
243 ir_to_mesa_src_reg src2);
244
245 void ir_to_mesa_emit_scalar_op1(ir_instruction *ir,
246 enum prog_opcode op,
247 ir_to_mesa_dst_reg dst,
248 ir_to_mesa_src_reg src0);
249
250 void ir_to_mesa_emit_scalar_op2(ir_instruction *ir,
251 enum prog_opcode op,
252 ir_to_mesa_dst_reg dst,
253 ir_to_mesa_src_reg src0,
254 ir_to_mesa_src_reg src1);
255
256 GLboolean try_emit_mad(ir_expression *ir,
257 int mul_operand);
258
259 int add_uniform(const char *name,
260 const glsl_type *type,
261 ir_constant *constant);
262 void add_aggregate_uniform(ir_instruction *ir,
263 const char *name,
264 const struct glsl_type *type,
265 ir_constant *constant,
266 struct ir_to_mesa_dst_reg temp);
267
268 struct hash_table *sampler_map;
269
270 void set_sampler_location(ir_variable *sampler, int location);
271 int get_sampler_location(ir_variable *sampler);
272
273 void *mem_ctx;
274 };
275
276 ir_to_mesa_src_reg ir_to_mesa_undef = ir_to_mesa_src_reg(PROGRAM_UNDEFINED, 0, NULL);
277
278 ir_to_mesa_dst_reg ir_to_mesa_undef_dst = {
279 PROGRAM_UNDEFINED, 0, SWIZZLE_NOOP, COND_TR, NULL,
280 };
281
282 ir_to_mesa_dst_reg ir_to_mesa_address_reg = {
283 PROGRAM_ADDRESS, 0, WRITEMASK_X, COND_TR, NULL
284 };
285
286 static int swizzle_for_size(int size)
287 {
288 int size_swizzles[4] = {
289 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
290 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
291 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
292 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
293 };
294
295 return size_swizzles[size - 1];
296 }
297
298 ir_to_mesa_instruction *
299 ir_to_mesa_visitor::ir_to_mesa_emit_op3(ir_instruction *ir,
300 enum prog_opcode op,
301 ir_to_mesa_dst_reg dst,
302 ir_to_mesa_src_reg src0,
303 ir_to_mesa_src_reg src1,
304 ir_to_mesa_src_reg src2)
305 {
306 ir_to_mesa_instruction *inst = new(mem_ctx) ir_to_mesa_instruction();
307 int num_reladdr = 0;
308
309 /* If we have to do relative addressing, we want to load the ARL
310 * reg directly for one of the regs, and preload the other reladdr
311 * sources into temps.
312 */
313 num_reladdr += dst.reladdr != NULL;
314 num_reladdr += src0.reladdr != NULL;
315 num_reladdr += src1.reladdr != NULL;
316 num_reladdr += src2.reladdr != NULL;
317
318 reladdr_to_temp(ir, &src2, &num_reladdr);
319 reladdr_to_temp(ir, &src1, &num_reladdr);
320 reladdr_to_temp(ir, &src0, &num_reladdr);
321
322 if (dst.reladdr) {
323 ir_to_mesa_emit_op1(ir, OPCODE_ARL, ir_to_mesa_address_reg,
324 *dst.reladdr);
325
326 num_reladdr--;
327 }
328 assert(num_reladdr == 0);
329
330 inst->op = op;
331 inst->dst_reg = dst;
332 inst->src_reg[0] = src0;
333 inst->src_reg[1] = src1;
334 inst->src_reg[2] = src2;
335 inst->ir = ir;
336
337 inst->function = NULL;
338
339 this->instructions.push_tail(inst);
340
341 return inst;
342 }
343
344
345 ir_to_mesa_instruction *
346 ir_to_mesa_visitor::ir_to_mesa_emit_op2(ir_instruction *ir,
347 enum prog_opcode op,
348 ir_to_mesa_dst_reg dst,
349 ir_to_mesa_src_reg src0,
350 ir_to_mesa_src_reg src1)
351 {
352 return ir_to_mesa_emit_op3(ir, op, dst, src0, src1, ir_to_mesa_undef);
353 }
354
355 ir_to_mesa_instruction *
356 ir_to_mesa_visitor::ir_to_mesa_emit_op1(ir_instruction *ir,
357 enum prog_opcode op,
358 ir_to_mesa_dst_reg dst,
359 ir_to_mesa_src_reg src0)
360 {
361 assert(dst.writemask != 0);
362 return ir_to_mesa_emit_op3(ir, op, dst,
363 src0, ir_to_mesa_undef, ir_to_mesa_undef);
364 }
365
366 ir_to_mesa_instruction *
367 ir_to_mesa_visitor::ir_to_mesa_emit_op0(ir_instruction *ir,
368 enum prog_opcode op)
369 {
370 return ir_to_mesa_emit_op3(ir, op, ir_to_mesa_undef_dst,
371 ir_to_mesa_undef,
372 ir_to_mesa_undef,
373 ir_to_mesa_undef);
374 }
375
376 void
377 ir_to_mesa_visitor::set_sampler_location(ir_variable *sampler, int location)
378 {
379 if (this->sampler_map == NULL) {
380 this->sampler_map = hash_table_ctor(0, hash_table_pointer_hash,
381 hash_table_pointer_compare);
382 }
383
384 hash_table_insert(this->sampler_map, (void *)(uintptr_t)location, sampler);
385 }
386
387 int
388 ir_to_mesa_visitor::get_sampler_location(ir_variable *sampler)
389 {
390 void *result = hash_table_find(this->sampler_map, sampler);
391
392 return (int)(uintptr_t)result;
393 }
394
395 inline ir_to_mesa_dst_reg
396 ir_to_mesa_dst_reg_from_src(ir_to_mesa_src_reg reg)
397 {
398 ir_to_mesa_dst_reg dst_reg;
399
400 dst_reg.file = reg.file;
401 dst_reg.index = reg.index;
402 dst_reg.writemask = WRITEMASK_XYZW;
403 dst_reg.cond_mask = COND_TR;
404 dst_reg.reladdr = reg.reladdr;
405
406 return dst_reg;
407 }
408
409 inline ir_to_mesa_src_reg
410 ir_to_mesa_src_reg_from_dst(ir_to_mesa_dst_reg reg)
411 {
412 return ir_to_mesa_src_reg(reg.file, reg.index, NULL);
413 }
414
415 /**
416 * Emits Mesa scalar opcodes to produce unique answers across channels.
417 *
418 * Some Mesa opcodes are scalar-only, like ARB_fp/vp. The src X
419 * channel determines the result across all channels. So to do a vec4
420 * of this operation, we want to emit a scalar per source channel used
421 * to produce dest channels.
422 */
423 void
424 ir_to_mesa_visitor::ir_to_mesa_emit_scalar_op2(ir_instruction *ir,
425 enum prog_opcode op,
426 ir_to_mesa_dst_reg dst,
427 ir_to_mesa_src_reg orig_src0,
428 ir_to_mesa_src_reg orig_src1)
429 {
430 int i, j;
431 int done_mask = ~dst.writemask;
432
433 /* Mesa RCP is a scalar operation splatting results to all channels,
434 * like ARB_fp/vp. So emit as many RCPs as necessary to cover our
435 * dst channels.
436 */
437 for (i = 0; i < 4; i++) {
438 GLuint this_mask = (1 << i);
439 ir_to_mesa_instruction *inst;
440 ir_to_mesa_src_reg src0 = orig_src0;
441 ir_to_mesa_src_reg src1 = orig_src1;
442
443 if (done_mask & this_mask)
444 continue;
445
446 GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
447 GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
448 for (j = i + 1; j < 4; j++) {
449 if (!(done_mask & (1 << j)) &&
450 GET_SWZ(src0.swizzle, j) == src0_swiz &&
451 GET_SWZ(src1.swizzle, j) == src1_swiz) {
452 this_mask |= (1 << j);
453 }
454 }
455 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
456 src0_swiz, src0_swiz);
457 src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
458 src1_swiz, src1_swiz);
459
460 inst = ir_to_mesa_emit_op2(ir, op,
461 dst,
462 src0,
463 src1);
464 inst->dst_reg.writemask = this_mask;
465 done_mask |= this_mask;
466 }
467 }
468
469 void
470 ir_to_mesa_visitor::ir_to_mesa_emit_scalar_op1(ir_instruction *ir,
471 enum prog_opcode op,
472 ir_to_mesa_dst_reg dst,
473 ir_to_mesa_src_reg src0)
474 {
475 ir_to_mesa_src_reg undef = ir_to_mesa_undef;
476
477 undef.swizzle = SWIZZLE_XXXX;
478
479 ir_to_mesa_emit_scalar_op2(ir, op, dst, src0, undef);
480 }
481
482 struct ir_to_mesa_src_reg
483 ir_to_mesa_visitor::src_reg_for_float(float val)
484 {
485 ir_to_mesa_src_reg src_reg(PROGRAM_CONSTANT, -1, NULL);
486
487 src_reg.index = _mesa_add_unnamed_constant(this->prog->Parameters,
488 &val, 1, &src_reg.swizzle);
489
490 return src_reg;
491 }
492
493 static int
494 type_size(const struct glsl_type *type)
495 {
496 unsigned int i;
497 int size;
498
499 switch (type->base_type) {
500 case GLSL_TYPE_UINT:
501 case GLSL_TYPE_INT:
502 case GLSL_TYPE_FLOAT:
503 case GLSL_TYPE_BOOL:
504 if (type->is_matrix()) {
505 return type->matrix_columns;
506 } else {
507 /* Regardless of size of vector, it gets a vec4. This is bad
508 * packing for things like floats, but otherwise arrays become a
509 * mess. Hopefully a later pass over the code can pack scalars
510 * down if appropriate.
511 */
512 return 1;
513 }
514 case GLSL_TYPE_ARRAY:
515 return type_size(type->fields.array) * type->length;
516 case GLSL_TYPE_STRUCT:
517 size = 0;
518 for (i = 0; i < type->length; i++) {
519 size += type_size(type->fields.structure[i].type);
520 }
521 return size;
522 case GLSL_TYPE_SAMPLER:
523 /* Samplers take up no register space, since they're baked in at
524 * link time.
525 */
526 return 0;
527 default:
528 assert(0);
529 }
530 }
531
532 /**
533 * In the initial pass of codegen, we assign temporary numbers to
534 * intermediate results. (not SSA -- variable assignments will reuse
535 * storage). Actual register allocation for the Mesa VM occurs in a
536 * pass over the Mesa IR later.
537 */
538 ir_to_mesa_src_reg
539 ir_to_mesa_visitor::get_temp(const glsl_type *type)
540 {
541 ir_to_mesa_src_reg src_reg;
542 int swizzle[4];
543 int i;
544
545 src_reg.file = PROGRAM_TEMPORARY;
546 src_reg.index = next_temp;
547 src_reg.reladdr = NULL;
548 next_temp += type_size(type);
549
550 if (type->is_array() || type->is_record()) {
551 src_reg.swizzle = SWIZZLE_NOOP;
552 } else {
553 for (i = 0; i < type->vector_elements; i++)
554 swizzle[i] = i;
555 for (; i < 4; i++)
556 swizzle[i] = type->vector_elements - 1;
557 src_reg.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1],
558 swizzle[2], swizzle[3]);
559 }
560 src_reg.negate = 0;
561
562 return src_reg;
563 }
564
565 variable_storage *
566 ir_to_mesa_visitor::find_variable_storage(ir_variable *var)
567 {
568
569 variable_storage *entry;
570
571 foreach_iter(exec_list_iterator, iter, this->variables) {
572 entry = (variable_storage *)iter.get();
573
574 if (entry->var == var)
575 return entry;
576 }
577
578 return NULL;
579 }
580
581 void
582 ir_to_mesa_visitor::visit(ir_variable *ir)
583 {
584 if (strcmp(ir->name, "gl_FragCoord") == 0) {
585 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
586
587 fp->OriginUpperLeft = ir->origin_upper_left;
588 fp->PixelCenterInteger = ir->pixel_center_integer;
589 }
590 }
591
592 void
593 ir_to_mesa_visitor::visit(ir_loop *ir)
594 {
595 assert(!ir->from);
596 assert(!ir->to);
597 assert(!ir->increment);
598 assert(!ir->counter);
599
600 ir_to_mesa_emit_op0(NULL, OPCODE_BGNLOOP);
601 visit_exec_list(&ir->body_instructions, this);
602 ir_to_mesa_emit_op0(NULL, OPCODE_ENDLOOP);
603 }
604
605 void
606 ir_to_mesa_visitor::visit(ir_loop_jump *ir)
607 {
608 switch (ir->mode) {
609 case ir_loop_jump::jump_break:
610 ir_to_mesa_emit_op0(NULL, OPCODE_BRK);
611 break;
612 case ir_loop_jump::jump_continue:
613 ir_to_mesa_emit_op0(NULL, OPCODE_CONT);
614 break;
615 }
616 }
617
618
619 void
620 ir_to_mesa_visitor::visit(ir_function_signature *ir)
621 {
622 assert(0);
623 (void)ir;
624 }
625
626 void
627 ir_to_mesa_visitor::visit(ir_function *ir)
628 {
629 /* Ignore function bodies other than main() -- we shouldn't see calls to
630 * them since they should all be inlined before we get to ir_to_mesa.
631 */
632 if (strcmp(ir->name, "main") == 0) {
633 const ir_function_signature *sig;
634 exec_list empty;
635
636 sig = ir->matching_signature(&empty);
637
638 assert(sig);
639
640 foreach_iter(exec_list_iterator, iter, sig->body) {
641 ir_instruction *ir = (ir_instruction *)iter.get();
642
643 ir->accept(this);
644 }
645 }
646 }
647
648 GLboolean
649 ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
650 {
651 int nonmul_operand = 1 - mul_operand;
652 ir_to_mesa_src_reg a, b, c;
653
654 ir_expression *expr = ir->operands[mul_operand]->as_expression();
655 if (!expr || expr->operation != ir_binop_mul)
656 return false;
657
658 expr->operands[0]->accept(this);
659 a = this->result;
660 expr->operands[1]->accept(this);
661 b = this->result;
662 ir->operands[nonmul_operand]->accept(this);
663 c = this->result;
664
665 this->result = get_temp(ir->type);
666 ir_to_mesa_emit_op3(ir, OPCODE_MAD,
667 ir_to_mesa_dst_reg_from_src(this->result), a, b, c);
668
669 return true;
670 }
671
672 void
673 ir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir,
674 ir_to_mesa_src_reg *reg, int *num_reladdr)
675 {
676 if (!reg->reladdr)
677 return;
678
679 ir_to_mesa_emit_op1(ir, OPCODE_ARL, ir_to_mesa_address_reg, *reg->reladdr);
680
681 if (*num_reladdr != 1) {
682 ir_to_mesa_src_reg temp = get_temp(glsl_type::vec4_type);
683
684 ir_to_mesa_emit_op1(ir, OPCODE_MOV,
685 ir_to_mesa_dst_reg_from_src(temp), *reg);
686 *reg = temp;
687 }
688
689 (*num_reladdr)--;
690 }
691
692 void
693 ir_to_mesa_visitor::visit(ir_expression *ir)
694 {
695 unsigned int operand;
696 struct ir_to_mesa_src_reg op[2];
697 struct ir_to_mesa_src_reg result_src;
698 struct ir_to_mesa_dst_reg result_dst;
699 const glsl_type *vec4_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 4, 1);
700 const glsl_type *vec3_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 3, 1);
701 const glsl_type *vec2_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 2, 1);
702
703 /* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c)
704 */
705 if (ir->operation == ir_binop_add) {
706 if (try_emit_mad(ir, 1))
707 return;
708 if (try_emit_mad(ir, 0))
709 return;
710 }
711
712 for (operand = 0; operand < ir->get_num_operands(); operand++) {
713 this->result.file = PROGRAM_UNDEFINED;
714 ir->operands[operand]->accept(this);
715 if (this->result.file == PROGRAM_UNDEFINED) {
716 ir_print_visitor v;
717 printf("Failed to get tree for expression operand:\n");
718 ir->operands[operand]->accept(&v);
719 exit(1);
720 }
721 op[operand] = this->result;
722
723 /* Matrix expression operands should have been broken down to vector
724 * operations already.
725 */
726 assert(!ir->operands[operand]->type->is_matrix());
727 }
728
729 this->result.file = PROGRAM_UNDEFINED;
730
731 /* Storage for our result. Ideally for an assignment we'd be using
732 * the actual storage for the result here, instead.
733 */
734 result_src = get_temp(ir->type);
735 /* convenience for the emit functions below. */
736 result_dst = ir_to_mesa_dst_reg_from_src(result_src);
737 /* Limit writes to the channels that will be used by result_src later.
738 * This does limit this temp's use as a temporary for multi-instruction
739 * sequences.
740 */
741 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
742
743 switch (ir->operation) {
744 case ir_unop_logic_not:
745 ir_to_mesa_emit_op2(ir, OPCODE_SEQ, result_dst,
746 op[0], src_reg_for_float(0.0));
747 break;
748 case ir_unop_neg:
749 op[0].negate = ~op[0].negate;
750 result_src = op[0];
751 break;
752 case ir_unop_abs:
753 ir_to_mesa_emit_op1(ir, OPCODE_ABS, result_dst, op[0]);
754 break;
755 case ir_unop_sign:
756 ir_to_mesa_emit_op1(ir, OPCODE_SSG, result_dst, op[0]);
757 break;
758 case ir_unop_rcp:
759 ir_to_mesa_emit_scalar_op1(ir, OPCODE_RCP, result_dst, op[0]);
760 break;
761
762 case ir_unop_exp2:
763 ir_to_mesa_emit_scalar_op1(ir, OPCODE_EX2, result_dst, op[0]);
764 break;
765 case ir_unop_exp:
766 case ir_unop_log:
767 assert(!"not reached: should be handled by ir_explog_to_explog2");
768 break;
769 case ir_unop_log2:
770 ir_to_mesa_emit_scalar_op1(ir, OPCODE_LG2, result_dst, op[0]);
771 break;
772 case ir_unop_sin:
773 ir_to_mesa_emit_scalar_op1(ir, OPCODE_SIN, result_dst, op[0]);
774 break;
775 case ir_unop_cos:
776 ir_to_mesa_emit_scalar_op1(ir, OPCODE_COS, result_dst, op[0]);
777 break;
778
779 case ir_unop_dFdx:
780 ir_to_mesa_emit_op1(ir, OPCODE_DDX, result_dst, op[0]);
781 break;
782 case ir_unop_dFdy:
783 ir_to_mesa_emit_op1(ir, OPCODE_DDY, result_dst, op[0]);
784 break;
785
786 case ir_binop_add:
787 ir_to_mesa_emit_op2(ir, OPCODE_ADD, result_dst, op[0], op[1]);
788 break;
789 case ir_binop_sub:
790 ir_to_mesa_emit_op2(ir, OPCODE_SUB, result_dst, op[0], op[1]);
791 break;
792
793 case ir_binop_mul:
794 ir_to_mesa_emit_op2(ir, OPCODE_MUL, result_dst, op[0], op[1]);
795 break;
796 case ir_binop_div:
797 assert(!"not reached: should be handled by ir_div_to_mul_rcp");
798 case ir_binop_mod:
799 assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
800 break;
801
802 case ir_binop_less:
803 ir_to_mesa_emit_op2(ir, OPCODE_SLT, result_dst, op[0], op[1]);
804 break;
805 case ir_binop_greater:
806 ir_to_mesa_emit_op2(ir, OPCODE_SGT, result_dst, op[0], op[1]);
807 break;
808 case ir_binop_lequal:
809 ir_to_mesa_emit_op2(ir, OPCODE_SLE, result_dst, op[0], op[1]);
810 break;
811 case ir_binop_gequal:
812 ir_to_mesa_emit_op2(ir, OPCODE_SGE, result_dst, op[0], op[1]);
813 break;
814 case ir_binop_equal:
815 ir_to_mesa_emit_op2(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
816 break;
817 case ir_binop_logic_xor:
818 case ir_binop_nequal:
819 ir_to_mesa_emit_op2(ir, OPCODE_SNE, result_dst, op[0], op[1]);
820 break;
821
822 case ir_binop_logic_or:
823 /* This could be a saturated add and skip the SNE. */
824 ir_to_mesa_emit_op2(ir, OPCODE_ADD,
825 result_dst,
826 op[0], op[1]);
827
828 ir_to_mesa_emit_op2(ir, OPCODE_SNE,
829 result_dst,
830 result_src, src_reg_for_float(0.0));
831 break;
832
833 case ir_binop_logic_and:
834 /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
835 ir_to_mesa_emit_op2(ir, OPCODE_MUL,
836 result_dst,
837 op[0], op[1]);
838 break;
839
840 case ir_binop_dot:
841 if (ir->operands[0]->type == vec4_type) {
842 assert(ir->operands[1]->type == vec4_type);
843 ir_to_mesa_emit_op2(ir, OPCODE_DP4,
844 result_dst,
845 op[0], op[1]);
846 } else if (ir->operands[0]->type == vec3_type) {
847 assert(ir->operands[1]->type == vec3_type);
848 ir_to_mesa_emit_op2(ir, OPCODE_DP3,
849 result_dst,
850 op[0], op[1]);
851 } else if (ir->operands[0]->type == vec2_type) {
852 assert(ir->operands[1]->type == vec2_type);
853 ir_to_mesa_emit_op2(ir, OPCODE_DP2,
854 result_dst,
855 op[0], op[1]);
856 }
857 break;
858
859 case ir_binop_cross:
860 ir_to_mesa_emit_op2(ir, OPCODE_XPD, result_dst, op[0], op[1]);
861 break;
862
863 case ir_unop_sqrt:
864 ir_to_mesa_emit_scalar_op1(ir, OPCODE_RSQ, result_dst, op[0]);
865 ir_to_mesa_emit_scalar_op1(ir, OPCODE_RCP, result_dst, result_src);
866 /* For incoming channels < 0, set the result to 0. */
867 ir_to_mesa_emit_op3(ir, OPCODE_CMP, result_dst,
868 op[0], src_reg_for_float(0.0), result_src);
869 break;
870 case ir_unop_rsq:
871 ir_to_mesa_emit_scalar_op1(ir, OPCODE_RSQ, result_dst, op[0]);
872 break;
873 case ir_unop_i2f:
874 case ir_unop_b2f:
875 case ir_unop_b2i:
876 /* Mesa IR lacks types, ints are stored as truncated floats. */
877 result_src = op[0];
878 break;
879 case ir_unop_f2i:
880 ir_to_mesa_emit_op1(ir, OPCODE_TRUNC, result_dst, op[0]);
881 break;
882 case ir_unop_f2b:
883 case ir_unop_i2b:
884 ir_to_mesa_emit_op2(ir, OPCODE_SNE, result_dst,
885 result_src, src_reg_for_float(0.0));
886 break;
887 case ir_unop_trunc:
888 ir_to_mesa_emit_op1(ir, OPCODE_TRUNC, result_dst, op[0]);
889 break;
890 case ir_unop_ceil:
891 op[0].negate = ~op[0].negate;
892 ir_to_mesa_emit_op1(ir, OPCODE_FLR, result_dst, op[0]);
893 result_src.negate = ~result_src.negate;
894 break;
895 case ir_unop_floor:
896 ir_to_mesa_emit_op1(ir, OPCODE_FLR, result_dst, op[0]);
897 break;
898 case ir_unop_fract:
899 ir_to_mesa_emit_op1(ir, OPCODE_FRC, result_dst, op[0]);
900 break;
901
902 case ir_binop_min:
903 ir_to_mesa_emit_op2(ir, OPCODE_MIN, result_dst, op[0], op[1]);
904 break;
905 case ir_binop_max:
906 ir_to_mesa_emit_op2(ir, OPCODE_MAX, result_dst, op[0], op[1]);
907 break;
908 case ir_binop_pow:
909 ir_to_mesa_emit_scalar_op2(ir, OPCODE_POW, result_dst, op[0], op[1]);
910 break;
911
912 case ir_unop_bit_not:
913 case ir_unop_u2f:
914 case ir_binop_lshift:
915 case ir_binop_rshift:
916 case ir_binop_bit_and:
917 case ir_binop_bit_xor:
918 case ir_binop_bit_or:
919 assert(!"GLSL 1.30 features unsupported");
920 break;
921 }
922
923 this->result = result_src;
924 }
925
926
927 void
928 ir_to_mesa_visitor::visit(ir_swizzle *ir)
929 {
930 ir_to_mesa_src_reg src_reg;
931 int i;
932 int swizzle[4];
933
934 /* Note that this is only swizzles in expressions, not those on the left
935 * hand side of an assignment, which do write masking. See ir_assignment
936 * for that.
937 */
938
939 ir->val->accept(this);
940 src_reg = this->result;
941 assert(src_reg.file != PROGRAM_UNDEFINED);
942
943 for (i = 0; i < 4; i++) {
944 if (i < ir->type->vector_elements) {
945 switch (i) {
946 case 0:
947 swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.x);
948 break;
949 case 1:
950 swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.y);
951 break;
952 case 2:
953 swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.z);
954 break;
955 case 3:
956 swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.w);
957 break;
958 }
959 } else {
960 /* If the type is smaller than a vec4, replicate the last
961 * channel out.
962 */
963 swizzle[i] = swizzle[ir->type->vector_elements - 1];
964 }
965 }
966
967 src_reg.swizzle = MAKE_SWIZZLE4(swizzle[0],
968 swizzle[1],
969 swizzle[2],
970 swizzle[3]);
971
972 this->result = src_reg;
973 }
974
975 static const struct {
976 const char *name;
977 const char *field;
978 int tokens[STATE_LENGTH];
979 int swizzle;
980 bool array_indexed;
981 } statevars[] = {
982 {"gl_DepthRange", "near",
983 {STATE_DEPTH_RANGE, 0, 0}, SWIZZLE_XXXX},
984 {"gl_DepthRange", "far",
985 {STATE_DEPTH_RANGE, 0, 0}, SWIZZLE_YYYY},
986 {"gl_DepthRange", "diff",
987 {STATE_DEPTH_RANGE, 0, 0}, SWIZZLE_ZZZZ},
988
989 {"gl_ClipPlane", NULL,
990 {STATE_CLIPPLANE, 0, 0}, SWIZZLE_XYZW, true}
991 ,
992 {"gl_Point", "size",
993 {STATE_POINT_SIZE}, SWIZZLE_XXXX},
994 {"gl_Point", "sizeMin",
995 {STATE_POINT_SIZE}, SWIZZLE_YYYY},
996 {"gl_Point", "sizeMax",
997 {STATE_POINT_SIZE}, SWIZZLE_ZZZZ},
998 {"gl_Point", "fadeThresholdSize",
999 {STATE_POINT_SIZE}, SWIZZLE_WWWW},
1000 {"gl_Point", "distanceConstantAttenuation",
1001 {STATE_POINT_ATTENUATION}, SWIZZLE_XXXX},
1002 {"gl_Point", "distanceLinearAttenuation",
1003 {STATE_POINT_ATTENUATION}, SWIZZLE_YYYY},
1004 {"gl_Point", "distanceQuadraticAttenuation",
1005 {STATE_POINT_ATTENUATION}, SWIZZLE_ZZZZ},
1006
1007 {"gl_FrontMaterial", "emission",
1008 {STATE_MATERIAL, 0, STATE_EMISSION}, SWIZZLE_XYZW},
1009 {"gl_FrontMaterial", "ambient",
1010 {STATE_MATERIAL, 0, STATE_AMBIENT}, SWIZZLE_XYZW},
1011 {"gl_FrontMaterial", "diffuse",
1012 {STATE_MATERIAL, 0, STATE_DIFFUSE}, SWIZZLE_XYZW},
1013 {"gl_FrontMaterial", "specular",
1014 {STATE_MATERIAL, 0, STATE_SPECULAR}, SWIZZLE_XYZW},
1015 {"gl_FrontMaterial", "shininess",
1016 {STATE_MATERIAL, 0, STATE_SHININESS}, SWIZZLE_XXXX},
1017
1018 {"gl_BackMaterial", "emission",
1019 {STATE_MATERIAL, 1, STATE_EMISSION}, SWIZZLE_XYZW},
1020 {"gl_BackMaterial", "ambient",
1021 {STATE_MATERIAL, 1, STATE_AMBIENT}, SWIZZLE_XYZW},
1022 {"gl_BackMaterial", "diffuse",
1023 {STATE_MATERIAL, 1, STATE_DIFFUSE}, SWIZZLE_XYZW},
1024 {"gl_BackMaterial", "specular",
1025 {STATE_MATERIAL, 1, STATE_SPECULAR}, SWIZZLE_XYZW},
1026 {"gl_BackMaterial", "shininess",
1027 {STATE_MATERIAL, 1, STATE_SHININESS}, SWIZZLE_XXXX},
1028
1029 {"gl_LightSource", "ambient",
1030 {STATE_LIGHT, 0, STATE_AMBIENT}, SWIZZLE_XYZW, true},
1031 {"gl_LightSource", "diffuse",
1032 {STATE_LIGHT, 0, STATE_DIFFUSE}, SWIZZLE_XYZW, true},
1033 {"gl_LightSource", "specular",
1034 {STATE_LIGHT, 0, STATE_SPECULAR}, SWIZZLE_XYZW, true},
1035 {"gl_LightSource", "position",
1036 {STATE_LIGHT, 0, STATE_POSITION}, SWIZZLE_XYZW, true},
1037 {"gl_LightSource", "halfVector",
1038 {STATE_LIGHT, 0, STATE_HALF_VECTOR}, SWIZZLE_XYZW, true},
1039 {"gl_LightSource", "spotDirection",
1040 {STATE_LIGHT, 0, STATE_SPOT_DIRECTION}, SWIZZLE_XYZW, true},
1041 {"gl_LightSource", "spotCosCutoff",
1042 {STATE_LIGHT, 0, STATE_SPOT_DIRECTION}, SWIZZLE_WWWW, true},
1043 {"gl_LightSource", "spotCutoff",
1044 {STATE_LIGHT, 0, STATE_SPOT_CUTOFF}, SWIZZLE_XXXX, true},
1045 {"gl_LightSource", "spotExponent",
1046 {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_WWWW, true},
1047 {"gl_LightSource", "constantAttenuation",
1048 {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_XXXX, true},
1049 {"gl_LightSource", "linearAttenuation",
1050 {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_YYYY, true},
1051 {"gl_LightSource", "quadraticAttenuation",
1052 {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_ZZZZ, true},
1053
1054 {"gl_LightModel", NULL,
1055 {STATE_LIGHTMODEL_AMBIENT, 0}, SWIZZLE_XYZW},
1056
1057 {"gl_FrontLightModelProduct", NULL,
1058 {STATE_LIGHTMODEL_SCENECOLOR, 0}, SWIZZLE_XYZW},
1059 {"gl_BackLightModelProduct", NULL,
1060 {STATE_LIGHTMODEL_SCENECOLOR, 1}, SWIZZLE_XYZW},
1061
1062 {"gl_FrontLightProduct", "ambient",
1063 {STATE_LIGHTPROD, 0, 0, STATE_AMBIENT}, SWIZZLE_XYZW, true},
1064 {"gl_FrontLightProduct", "diffuse",
1065 {STATE_LIGHTPROD, 0, 0, STATE_DIFFUSE}, SWIZZLE_XYZW, true},
1066 {"gl_FrontLightProduct", "specular",
1067 {STATE_LIGHTPROD, 0, 0, STATE_SPECULAR}, SWIZZLE_XYZW, true},
1068
1069 {"gl_BackLightProduct", "ambient",
1070 {STATE_LIGHTPROD, 0, 1, STATE_AMBIENT}, SWIZZLE_XYZW, true},
1071 {"gl_BackLightProduct", "diffuse",
1072 {STATE_LIGHTPROD, 0, 1, STATE_DIFFUSE}, SWIZZLE_XYZW, true},
1073 {"gl_BackLightProduct", "specular",
1074 {STATE_LIGHTPROD, 0, 1, STATE_SPECULAR}, SWIZZLE_XYZW, true},
1075
1076 {"gl_TextureEnvColor", "ambient",
1077 {STATE_TEXENV_COLOR, 0}, SWIZZLE_XYZW, true},
1078
1079 {"gl_EyePlaneS", NULL,
1080 {STATE_TEXGEN, 0, STATE_TEXGEN_EYE_S}, SWIZZLE_XYZW, true},
1081 {"gl_EyePlaneT", NULL,
1082 {STATE_TEXGEN, 0, STATE_TEXGEN_EYE_T}, SWIZZLE_XYZW, true},
1083 {"gl_EyePlaneR", NULL,
1084 {STATE_TEXGEN, 0, STATE_TEXGEN_EYE_R}, SWIZZLE_XYZW, true},
1085 {"gl_EyePlaneQ", NULL,
1086 {STATE_TEXGEN, 0, STATE_TEXGEN_EYE_Q}, SWIZZLE_XYZW, true},
1087
1088 {"gl_ObjectPlaneS", NULL,
1089 {STATE_TEXGEN, 0, STATE_TEXGEN_OBJECT_S}, SWIZZLE_XYZW, true},
1090 {"gl_ObjectPlaneT", NULL,
1091 {STATE_TEXGEN, 0, STATE_TEXGEN_OBJECT_T}, SWIZZLE_XYZW, true},
1092 {"gl_ObjectPlaneR", NULL,
1093 {STATE_TEXGEN, 0, STATE_TEXGEN_OBJECT_R}, SWIZZLE_XYZW, true},
1094 {"gl_ObjectPlaneQ", NULL,
1095 {STATE_TEXGEN, 0, STATE_TEXGEN_OBJECT_Q}, SWIZZLE_XYZW, true},
1096
1097 {"gl_Fog", "color",
1098 {STATE_FOG_COLOR}, SWIZZLE_XYZW},
1099 {"gl_Fog", "density",
1100 {STATE_FOG_PARAMS}, SWIZZLE_XXXX},
1101 {"gl_Fog", "start",
1102 {STATE_FOG_PARAMS}, SWIZZLE_YYYY},
1103 {"gl_Fog", "end",
1104 {STATE_FOG_PARAMS}, SWIZZLE_ZZZZ},
1105 {"gl_Fog", "scale",
1106 {STATE_FOG_PARAMS}, SWIZZLE_WWWW},
1107 };
1108
1109 static ir_to_mesa_src_reg
1110 get_builtin_uniform_reg(struct gl_program *prog,
1111 const char *name, int array_index, const char *field)
1112 {
1113 unsigned int i;
1114 ir_to_mesa_src_reg src_reg;
1115 int tokens[STATE_LENGTH];
1116
1117 for (i = 0; i < Elements(statevars); i++) {
1118 if (strcmp(statevars[i].name, name) != 0)
1119 continue;
1120 if (!field && statevars[i].field) {
1121 assert(!"FINISHME: whole-structure state var dereference");
1122 }
1123 if (field && strcmp(statevars[i].field, field) != 0)
1124 continue;
1125 break;
1126 }
1127
1128 if (i == Elements(statevars)) {
1129 printf("builtin uniform %s%s%s not found\n",
1130 name,
1131 field ? "." : "",
1132 field ? field : "");
1133 abort();
1134 }
1135
1136 memcpy(&tokens, statevars[i].tokens, sizeof(tokens));
1137 if (statevars[i].array_indexed)
1138 tokens[1] = array_index;
1139
1140 src_reg.file = PROGRAM_STATE_VAR;
1141 src_reg.index = _mesa_add_state_reference(prog->Parameters,
1142 (gl_state_index *)tokens);
1143 src_reg.swizzle = statevars[i].swizzle;
1144 src_reg.negate = 0;
1145 src_reg.reladdr = false;
1146
1147 return src_reg;
1148 }
1149
1150 static int
1151 add_matrix_ref(struct gl_program *prog, int *tokens)
1152 {
1153 int base_pos = -1;
1154 int i;
1155
1156 /* Add a ref for each column. It looks like the reason we do
1157 * it this way is that _mesa_add_state_reference doesn't work
1158 * for things that aren't vec4s, so the tokens[2]/tokens[3]
1159 * range has to be equal.
1160 */
1161 for (i = 0; i < 4; i++) {
1162 tokens[2] = i;
1163 tokens[3] = i;
1164 int pos = _mesa_add_state_reference(prog->Parameters,
1165 (gl_state_index *)tokens);
1166 if (base_pos == -1)
1167 base_pos = pos;
1168 else
1169 assert(base_pos + i == pos);
1170 }
1171
1172 return base_pos;
1173 }
1174
1175 static variable_storage *
1176 get_builtin_matrix_ref(void *mem_ctx, struct gl_program *prog, ir_variable *var,
1177 ir_rvalue *array_index)
1178 {
1179 /*
1180 * NOTE: The ARB_vertex_program extension specified that matrices get
1181 * loaded in registers in row-major order. With GLSL, we want column-
1182 * major order. So, we need to transpose all matrices here...
1183 */
1184 static const struct {
1185 const char *name;
1186 int matrix;
1187 int modifier;
1188 } matrices[] = {
1189 { "gl_ModelViewMatrix", STATE_MODELVIEW_MATRIX, STATE_MATRIX_TRANSPOSE },
1190 { "gl_ModelViewMatrixInverse", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVTRANS },
1191 { "gl_ModelViewMatrixTranspose", STATE_MODELVIEW_MATRIX, 0 },
1192 { "gl_ModelViewMatrixInverseTranspose", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVERSE },
1193
1194 { "gl_ProjectionMatrix", STATE_PROJECTION_MATRIX, STATE_MATRIX_TRANSPOSE },
1195 { "gl_ProjectionMatrixInverse", STATE_PROJECTION_MATRIX, STATE_MATRIX_INVTRANS },
1196 { "gl_ProjectionMatrixTranspose", STATE_PROJECTION_MATRIX, 0 },
1197 { "gl_ProjectionMatrixInverseTranspose", STATE_PROJECTION_MATRIX, STATE_MATRIX_INVERSE },
1198
1199 { "gl_ModelViewProjectionMatrix", STATE_MVP_MATRIX, STATE_MATRIX_TRANSPOSE },
1200 { "gl_ModelViewProjectionMatrixInverse", STATE_MVP_MATRIX, STATE_MATRIX_INVTRANS },
1201 { "gl_ModelViewProjectionMatrixTranspose", STATE_MVP_MATRIX, 0 },
1202 { "gl_ModelViewProjectionMatrixInverseTranspose", STATE_MVP_MATRIX, STATE_MATRIX_INVERSE },
1203
1204 { "gl_TextureMatrix", STATE_TEXTURE_MATRIX, STATE_MATRIX_TRANSPOSE },
1205 { "gl_TextureMatrixInverse", STATE_TEXTURE_MATRIX, STATE_MATRIX_INVTRANS },
1206 { "gl_TextureMatrixTranspose", STATE_TEXTURE_MATRIX, 0 },
1207 { "gl_TextureMatrixInverseTranspose", STATE_TEXTURE_MATRIX, STATE_MATRIX_INVERSE },
1208
1209 { "gl_NormalMatrix", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVERSE },
1210
1211 };
1212 unsigned int i;
1213 variable_storage *entry;
1214
1215 /* C++ gets angry when we try to use an int as a gl_state_index, so we use
1216 * ints for gl_state_index. Make sure they're compatible.
1217 */
1218 assert(sizeof(gl_state_index) == sizeof(int));
1219
1220 for (i = 0; i < Elements(matrices); i++) {
1221 if (strcmp(var->name, matrices[i].name) == 0) {
1222 int tokens[STATE_LENGTH];
1223 int base_pos = -1;
1224
1225 tokens[0] = matrices[i].matrix;
1226 tokens[4] = matrices[i].modifier;
1227 if (matrices[i].matrix == STATE_TEXTURE_MATRIX) {
1228 ir_constant *index = array_index->constant_expression_value();
1229 if (index) {
1230 tokens[1] = index->value.i[0];
1231 base_pos = add_matrix_ref(prog, tokens);
1232 } else {
1233 for (i = 0; i < var->type->length; i++) {
1234 tokens[1] = i;
1235 int pos = add_matrix_ref(prog, tokens);
1236 if (base_pos == -1)
1237 base_pos = pos;
1238 else
1239 assert(base_pos + (int)i * 4 == pos);
1240 }
1241 }
1242 } else {
1243 tokens[1] = 0; /* unused array index */
1244 base_pos = add_matrix_ref(prog, tokens);
1245 }
1246 tokens[4] = matrices[i].modifier;
1247
1248 entry = new(mem_ctx) variable_storage(var,
1249 PROGRAM_STATE_VAR,
1250 base_pos);
1251
1252 return entry;
1253 }
1254 }
1255
1256 return NULL;
1257 }
1258
1259 int
1260 ir_to_mesa_visitor::add_uniform(const char *name,
1261 const glsl_type *type,
1262 ir_constant *constant)
1263 {
1264 int len;
1265
1266 if (type->is_vector() ||
1267 type->is_scalar()) {
1268 len = type->vector_elements;
1269 } else {
1270 len = type_size(type) * 4;
1271 }
1272
1273 float *values = NULL;
1274 if (constant && type->is_array()) {
1275 values = (float *)malloc(type->length * 4 * sizeof(float));
1276
1277 assert(type->fields.array->is_scalar() ||
1278 type->fields.array->is_vector() ||
1279 !"FINISHME: uniform array initializers for non-vector");
1280
1281 for (unsigned int i = 0; i < type->length; i++) {
1282 ir_constant *element = constant->array_elements[i];
1283 unsigned int c;
1284
1285 for (c = 0; c < type->fields.array->vector_elements; c++) {
1286 switch (type->fields.array->base_type) {
1287 case GLSL_TYPE_FLOAT:
1288 values[4 * i + c] = element->value.f[c];
1289 break;
1290 case GLSL_TYPE_INT:
1291 values[4 * i + c] = element->value.i[c];
1292 break;
1293 case GLSL_TYPE_UINT:
1294 values[4 * i + c] = element->value.u[c];
1295 break;
1296 case GLSL_TYPE_BOOL:
1297 values[4 * i + c] = element->value.b[c];
1298 break;
1299 default:
1300 assert(!"not reached");
1301 }
1302 }
1303 }
1304 } else if (constant) {
1305 values = (float *)malloc(16 * sizeof(float));
1306 for (unsigned int i = 0; i < type->components(); i++) {
1307 switch (type->base_type) {
1308 case GLSL_TYPE_FLOAT:
1309 values[i] = constant->value.f[i];
1310 break;
1311 case GLSL_TYPE_INT:
1312 values[i] = constant->value.i[i];
1313 break;
1314 case GLSL_TYPE_UINT:
1315 values[i] = constant->value.u[i];
1316 break;
1317 case GLSL_TYPE_BOOL:
1318 values[i] = constant->value.b[i];
1319 break;
1320 default:
1321 assert(!"not reached");
1322 }
1323 }
1324 }
1325
1326 int loc = _mesa_add_uniform(this->prog->Parameters,
1327 name,
1328 len,
1329 type->gl_type,
1330 values);
1331 free(values);
1332
1333 return loc;
1334 }
1335
1336 /* Recursively add all the members of the aggregate uniform as uniform names
1337 * to Mesa, moving those uniforms to our structured temporary.
1338 */
1339 void
1340 ir_to_mesa_visitor::add_aggregate_uniform(ir_instruction *ir,
1341 const char *name,
1342 const struct glsl_type *type,
1343 ir_constant *constant,
1344 struct ir_to_mesa_dst_reg temp)
1345 {
1346 int loc;
1347
1348 if (type->is_record()) {
1349 void *mem_ctx = talloc_new(NULL);
1350 ir_constant *field_constant = NULL;
1351
1352 if (constant)
1353 field_constant = (ir_constant *)constant->components.get_head();
1354
1355 for (unsigned int i = 0; i < type->length; i++) {
1356 const glsl_type *field_type = type->fields.structure[i].type;
1357
1358 add_aggregate_uniform(ir,
1359 talloc_asprintf(mem_ctx, "%s.%s", name,
1360 type->fields.structure[i].name),
1361 field_type, field_constant, temp);
1362 temp.index += type_size(field_type);
1363
1364 if (constant)
1365 field_constant = (ir_constant *)field_constant->next;
1366 }
1367
1368 talloc_free(mem_ctx);
1369
1370 return;
1371 }
1372
1373 assert(type->is_vector() || type->is_scalar() || !"FINISHME: other types");
1374
1375 loc = add_uniform(name, type, constant);
1376
1377 ir_to_mesa_src_reg uniform(PROGRAM_UNIFORM, loc, type);
1378
1379 for (int i = 0; i < type_size(type); i++) {
1380 ir_to_mesa_emit_op1(ir, OPCODE_MOV, temp, uniform);
1381 temp.index++;
1382 uniform.index++;
1383 }
1384 }
1385
1386
1387 void
1388 ir_to_mesa_visitor::visit(ir_dereference_variable *ir)
1389 {
1390 variable_storage *entry = find_variable_storage(ir->var);
1391 unsigned int loc;
1392
1393 if (!entry) {
1394 switch (ir->var->mode) {
1395 case ir_var_uniform:
1396 entry = get_builtin_matrix_ref(this->mem_ctx, this->prog, ir->var,
1397 NULL);
1398 if (entry)
1399 break;
1400
1401 /* FINISHME: Fix up uniform name for arrays and things */
1402 if (ir->var->type->base_type == GLSL_TYPE_SAMPLER ||
1403 (ir->var->type->base_type == GLSL_TYPE_ARRAY &&
1404 ir->var->type->fields.array->base_type == GLSL_TYPE_SAMPLER)) {
1405 int array_length;
1406
1407 if (ir->var->type->base_type == GLSL_TYPE_ARRAY)
1408 array_length = ir->var->type->length;
1409 else
1410 array_length = 1;
1411 int sampler = _mesa_add_sampler(this->prog->Parameters,
1412 ir->var->name,
1413 ir->var->type->gl_type,
1414 array_length);
1415 set_sampler_location(ir->var, sampler);
1416
1417 entry = new(mem_ctx) variable_storage(ir->var, PROGRAM_SAMPLER,
1418 sampler);
1419 this->variables.push_tail(entry);
1420 break;
1421 }
1422
1423 assert(ir->var->type->gl_type != 0 &&
1424 ir->var->type->gl_type != GL_INVALID_ENUM);
1425
1426 /* Oh, the joy of aggregate types in Mesa. Like constants,
1427 * we can only really do vec4s. So, make a temp, chop the
1428 * aggregate up into vec4s, and move those vec4s to the temp.
1429 */
1430 if (ir->var->type->is_record()) {
1431 ir_to_mesa_src_reg temp = get_temp(ir->var->type);
1432
1433 entry = new(mem_ctx) variable_storage(ir->var,
1434 temp.file,
1435 temp.index);
1436 this->variables.push_tail(entry);
1437
1438 add_aggregate_uniform(ir->var, ir->var->name, ir->var->type,
1439 ir->var->constant_value,
1440 ir_to_mesa_dst_reg_from_src(temp));
1441 break;
1442 }
1443
1444 loc = add_uniform(ir->var->name,
1445 ir->var->type,
1446 ir->var->constant_value);
1447
1448 /* Always mark the uniform used at this point. If it isn't
1449 * used, dead code elimination should have nuked the decl already.
1450 */
1451 this->prog->Parameters->Parameters[loc].Used = GL_TRUE;
1452
1453 entry = new(mem_ctx) variable_storage(ir->var, PROGRAM_UNIFORM, loc);
1454 this->variables.push_tail(entry);
1455 break;
1456 case ir_var_in:
1457 case ir_var_out:
1458 case ir_var_inout:
1459 /* The linker assigns locations for varyings and attributes,
1460 * including deprecated builtins (like gl_Color), user-assign
1461 * generic attributes (glBindVertexLocation), and
1462 * user-defined varyings.
1463 *
1464 * FINISHME: We would hit this path for function arguments. Fix!
1465 */
1466 assert(ir->var->location != -1);
1467 if (ir->var->mode == ir_var_in ||
1468 ir->var->mode == ir_var_inout) {
1469 entry = new(mem_ctx) variable_storage(ir->var,
1470 PROGRAM_INPUT,
1471 ir->var->location);
1472
1473 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
1474 ir->var->location >= VERT_ATTRIB_GENERIC0) {
1475 _mesa_add_attribute(prog->Attributes,
1476 ir->var->name,
1477 _mesa_sizeof_glsl_type(ir->var->type->gl_type),
1478 ir->var->type->gl_type,
1479 ir->var->location - VERT_ATTRIB_GENERIC0);
1480 }
1481 } else {
1482 entry = new(mem_ctx) variable_storage(ir->var,
1483 PROGRAM_OUTPUT,
1484 ir->var->location);
1485 }
1486
1487 break;
1488 case ir_var_auto:
1489 case ir_var_temporary:
1490 entry = new(mem_ctx) variable_storage(ir->var, PROGRAM_TEMPORARY,
1491 this->next_temp);
1492 this->variables.push_tail(entry);
1493
1494 next_temp += type_size(ir->var->type);
1495 break;
1496 }
1497
1498 if (!entry) {
1499 printf("Failed to make storage for %s\n", ir->var->name);
1500 exit(1);
1501 }
1502 }
1503
1504 this->result = ir_to_mesa_src_reg(entry->file, entry->index, ir->var->type);
1505 }
1506
1507 void
1508 ir_to_mesa_visitor::visit(ir_dereference_array *ir)
1509 {
1510 ir_variable *var = ir->variable_referenced();
1511 ir_constant *index;
1512 ir_to_mesa_src_reg src_reg;
1513 ir_dereference_variable *deref_var = ir->array->as_dereference_variable();
1514 int element_size = type_size(ir->type);
1515
1516 index = ir->array_index->constant_expression_value();
1517
1518 if (deref_var && strncmp(deref_var->var->name,
1519 "gl_TextureMatrix",
1520 strlen("gl_TextureMatrix")) == 0) {
1521 struct variable_storage *entry;
1522
1523 entry = get_builtin_matrix_ref(this->mem_ctx, this->prog, deref_var->var,
1524 ir->array_index);
1525 assert(entry);
1526
1527 ir_to_mesa_src_reg src_reg(entry->file, entry->index, ir->type);
1528
1529 if (index) {
1530 src_reg.reladdr = NULL;
1531 } else {
1532 ir_to_mesa_src_reg index_reg = get_temp(glsl_type::float_type);
1533
1534 ir->array_index->accept(this);
1535 ir_to_mesa_emit_op2(ir, OPCODE_MUL,
1536 ir_to_mesa_dst_reg_from_src(index_reg),
1537 this->result, src_reg_for_float(element_size));
1538
1539 src_reg.reladdr = talloc(mem_ctx, ir_to_mesa_src_reg);
1540 memcpy(src_reg.reladdr, &index_reg, sizeof(index_reg));
1541 }
1542
1543 this->result = src_reg;
1544 return;
1545 }
1546
1547 if (strncmp(var->name, "gl_", 3) == 0 && var->mode == ir_var_uniform &&
1548 !var->type->is_matrix()) {
1549 ir_dereference_record *record = NULL;
1550 if (ir->array->ir_type == ir_type_dereference_record)
1551 record = (ir_dereference_record *)ir->array;
1552
1553 assert(index || !"FINISHME: variable-indexed builtin uniform access");
1554
1555 this->result = get_builtin_uniform_reg(prog,
1556 var->name,
1557 index->value.i[0],
1558 record ? record->field : NULL);
1559 }
1560
1561 ir->array->accept(this);
1562 src_reg = this->result;
1563
1564 if (index) {
1565 src_reg.index += index->value.i[0] * element_size;
1566 } else {
1567 ir_to_mesa_src_reg array_base = this->result;
1568 /* Variable index array dereference. It eats the "vec4" of the
1569 * base of the array and an index that offsets the Mesa register
1570 * index.
1571 */
1572 ir->array_index->accept(this);
1573
1574 ir_to_mesa_src_reg index_reg;
1575
1576 if (element_size == 1) {
1577 index_reg = this->result;
1578 } else {
1579 index_reg = get_temp(glsl_type::float_type);
1580
1581 ir_to_mesa_emit_op2(ir, OPCODE_MUL,
1582 ir_to_mesa_dst_reg_from_src(index_reg),
1583 this->result, src_reg_for_float(element_size));
1584 }
1585
1586 src_reg.reladdr = talloc(mem_ctx, ir_to_mesa_src_reg);
1587 memcpy(src_reg.reladdr, &index_reg, sizeof(index_reg));
1588 }
1589
1590 /* If the type is smaller than a vec4, replicate the last channel out. */
1591 if (ir->type->is_scalar() || ir->type->is_vector())
1592 src_reg.swizzle = swizzle_for_size(ir->type->vector_elements);
1593 else
1594 src_reg.swizzle = SWIZZLE_NOOP;
1595
1596 this->result = src_reg;
1597 }
1598
1599 void
1600 ir_to_mesa_visitor::visit(ir_dereference_record *ir)
1601 {
1602 unsigned int i;
1603 const glsl_type *struct_type = ir->record->type;
1604 int offset = 0;
1605 ir_variable *var = ir->record->variable_referenced();
1606
1607 if (strncmp(var->name, "gl_", 3) == 0 && var->mode == ir_var_uniform) {
1608 assert(var);
1609
1610 this->result = get_builtin_uniform_reg(prog,
1611 var->name,
1612 0,
1613 ir->field);
1614 return;
1615 }
1616
1617 ir->record->accept(this);
1618
1619 for (i = 0; i < struct_type->length; i++) {
1620 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1621 break;
1622 offset += type_size(struct_type->fields.structure[i].type);
1623 }
1624 this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1625 this->result.index += offset;
1626 }
1627
1628 /**
1629 * We want to be careful in assignment setup to hit the actual storage
1630 * instead of potentially using a temporary like we might with the
1631 * ir_dereference handler.
1632 */
1633 static struct ir_to_mesa_dst_reg
1634 get_assignment_lhs(ir_dereference *ir, ir_to_mesa_visitor *v,
1635 ir_to_mesa_src_reg *r)
1636 {
1637 /* The LHS must be a dereference. If the LHS is a variable indexed array
1638 * access of a vector, it must be separated into a series conditional moves
1639 * before reaching this point (see ir_vec_index_to_cond_assign).
1640 */
1641 assert(ir->as_dereference());
1642 ir_dereference_array *deref_array = ir->as_dereference_array();
1643 if (deref_array) {
1644 assert(!deref_array->array->type->is_vector());
1645 }
1646
1647 /* Use the rvalue deref handler for the most part. We'll ignore
1648 * swizzles in it and write swizzles using writemask, though.
1649 */
1650 ir->accept(v);
1651 return ir_to_mesa_dst_reg_from_src(v->result);
1652 }
1653
1654 void
1655 ir_to_mesa_visitor::visit(ir_assignment *ir)
1656 {
1657 struct ir_to_mesa_dst_reg l;
1658 struct ir_to_mesa_src_reg r;
1659 int i;
1660
1661 ir->rhs->accept(this);
1662 r = this->result;
1663
1664 l = get_assignment_lhs(ir->lhs, this, &r);
1665
1666 /* FINISHME: This should really set to the correct maximal writemask for each
1667 * FINISHME: component written (in the loops below). This case can only
1668 * FINISHME: occur for matrices, arrays, and structures.
1669 */
1670 if (ir->write_mask == 0) {
1671 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
1672 l.writemask = WRITEMASK_XYZW;
1673 } else if (ir->lhs->type->is_scalar()) {
1674 /* FINISHME: This hack makes writing to gl_FragData, which lives in the
1675 * FINISHME: W component of fragment shader output zero, work correctly.
1676 */
1677 l.writemask = WRITEMASK_XYZW;
1678 } else {
1679 assert(ir->lhs->type->is_vector());
1680 l.writemask = ir->write_mask;
1681 }
1682
1683 assert(l.file != PROGRAM_UNDEFINED);
1684 assert(r.file != PROGRAM_UNDEFINED);
1685
1686 if (ir->condition) {
1687 ir_to_mesa_src_reg condition;
1688
1689 ir->condition->accept(this);
1690 condition = this->result;
1691
1692 /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves,
1693 * and the condition we produced is 0.0 or 1.0. By flipping the
1694 * sign, we can choose which value OPCODE_CMP produces without
1695 * an extra computing the condition.
1696 */
1697 condition.negate = ~condition.negate;
1698 for (i = 0; i < type_size(ir->lhs->type); i++) {
1699 ir_to_mesa_emit_op3(ir, OPCODE_CMP, l,
1700 condition, r, ir_to_mesa_src_reg_from_dst(l));
1701 l.index++;
1702 r.index++;
1703 }
1704 } else {
1705 for (i = 0; i < type_size(ir->lhs->type); i++) {
1706 ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r);
1707 l.index++;
1708 r.index++;
1709 }
1710 }
1711 }
1712
1713
1714 void
1715 ir_to_mesa_visitor::visit(ir_constant *ir)
1716 {
1717 ir_to_mesa_src_reg src_reg;
1718 GLfloat stack_vals[4];
1719 GLfloat *values = stack_vals;
1720 unsigned int i;
1721
1722 /* Unfortunately, 4 floats is all we can get into
1723 * _mesa_add_unnamed_constant. So, make a temp to store an
1724 * aggregate constant and move each constant value into it. If we
1725 * get lucky, copy propagation will eliminate the extra moves.
1726 */
1727
1728 if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1729 ir_to_mesa_src_reg temp_base = get_temp(ir->type);
1730 ir_to_mesa_dst_reg temp = ir_to_mesa_dst_reg_from_src(temp_base);
1731
1732 foreach_iter(exec_list_iterator, iter, ir->components) {
1733 ir_constant *field_value = (ir_constant *)iter.get();
1734 int size = type_size(field_value->type);
1735
1736 assert(size > 0);
1737
1738 field_value->accept(this);
1739 src_reg = this->result;
1740
1741 for (i = 0; i < (unsigned int)size; i++) {
1742 ir_to_mesa_emit_op1(ir, OPCODE_MOV, temp, src_reg);
1743
1744 src_reg.index++;
1745 temp.index++;
1746 }
1747 }
1748 this->result = temp_base;
1749 return;
1750 }
1751
1752 if (ir->type->is_array()) {
1753 ir_to_mesa_src_reg temp_base = get_temp(ir->type);
1754 ir_to_mesa_dst_reg temp = ir_to_mesa_dst_reg_from_src(temp_base);
1755 int size = type_size(ir->type->fields.array);
1756
1757 assert(size > 0);
1758
1759 for (i = 0; i < ir->type->length; i++) {
1760 ir->array_elements[i]->accept(this);
1761 src_reg = this->result;
1762 for (int j = 0; j < size; j++) {
1763 ir_to_mesa_emit_op1(ir, OPCODE_MOV, temp, src_reg);
1764
1765 src_reg.index++;
1766 temp.index++;
1767 }
1768 }
1769 this->result = temp_base;
1770 return;
1771 }
1772
1773 if (ir->type->is_matrix()) {
1774 ir_to_mesa_src_reg mat = get_temp(ir->type);
1775 ir_to_mesa_dst_reg mat_column = ir_to_mesa_dst_reg_from_src(mat);
1776
1777 for (i = 0; i < ir->type->matrix_columns; i++) {
1778 assert(ir->type->base_type == GLSL_TYPE_FLOAT);
1779 values = &ir->value.f[i * ir->type->vector_elements];
1780
1781 src_reg = ir_to_mesa_src_reg(PROGRAM_CONSTANT, -1, NULL);
1782 src_reg.index = _mesa_add_unnamed_constant(this->prog->Parameters,
1783 values,
1784 ir->type->vector_elements,
1785 &src_reg.swizzle);
1786 ir_to_mesa_emit_op1(ir, OPCODE_MOV, mat_column, src_reg);
1787
1788 mat_column.index++;
1789 }
1790
1791 this->result = mat;
1792 }
1793
1794 src_reg.file = PROGRAM_CONSTANT;
1795 switch (ir->type->base_type) {
1796 case GLSL_TYPE_FLOAT:
1797 values = &ir->value.f[0];
1798 break;
1799 case GLSL_TYPE_UINT:
1800 for (i = 0; i < ir->type->vector_elements; i++) {
1801 values[i] = ir->value.u[i];
1802 }
1803 break;
1804 case GLSL_TYPE_INT:
1805 for (i = 0; i < ir->type->vector_elements; i++) {
1806 values[i] = ir->value.i[i];
1807 }
1808 break;
1809 case GLSL_TYPE_BOOL:
1810 for (i = 0; i < ir->type->vector_elements; i++) {
1811 values[i] = ir->value.b[i];
1812 }
1813 break;
1814 default:
1815 assert(!"Non-float/uint/int/bool constant");
1816 }
1817
1818 this->result = ir_to_mesa_src_reg(PROGRAM_CONSTANT, -1, ir->type);
1819 this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters,
1820 values,
1821 ir->type->vector_elements,
1822 &this->result.swizzle);
1823 }
1824
1825 function_entry *
1826 ir_to_mesa_visitor::get_function_signature(ir_function_signature *sig)
1827 {
1828 function_entry *entry;
1829
1830 foreach_iter(exec_list_iterator, iter, this->function_signatures) {
1831 entry = (function_entry *)iter.get();
1832
1833 if (entry->sig == sig)
1834 return entry;
1835 }
1836
1837 entry = talloc(mem_ctx, function_entry);
1838 entry->sig = sig;
1839 entry->sig_id = this->next_signature_id++;
1840 entry->bgn_inst = NULL;
1841
1842 /* Allocate storage for all the parameters. */
1843 foreach_iter(exec_list_iterator, iter, sig->parameters) {
1844 ir_variable *param = (ir_variable *)iter.get();
1845 variable_storage *storage;
1846
1847 storage = find_variable_storage(param);
1848 assert(!storage);
1849
1850 storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY,
1851 this->next_temp);
1852 this->variables.push_tail(storage);
1853
1854 this->next_temp += type_size(param->type);
1855 }
1856
1857 if (!sig->return_type->is_void()) {
1858 entry->return_reg = get_temp(sig->return_type);
1859 } else {
1860 entry->return_reg = ir_to_mesa_undef;
1861 }
1862
1863 this->function_signatures.push_tail(entry);
1864 return entry;
1865 }
1866
1867 void
1868 ir_to_mesa_visitor::visit(ir_call *ir)
1869 {
1870 ir_to_mesa_instruction *call_inst;
1871 ir_function_signature *sig = ir->get_callee();
1872 function_entry *entry = get_function_signature(sig);
1873 int i;
1874
1875 /* Process in parameters. */
1876 exec_list_iterator sig_iter = sig->parameters.iterator();
1877 foreach_iter(exec_list_iterator, iter, *ir) {
1878 ir_rvalue *param_rval = (ir_rvalue *)iter.get();
1879 ir_variable *param = (ir_variable *)sig_iter.get();
1880
1881 if (param->mode == ir_var_in ||
1882 param->mode == ir_var_inout) {
1883 variable_storage *storage = find_variable_storage(param);
1884 assert(storage);
1885
1886 param_rval->accept(this);
1887 ir_to_mesa_src_reg r = this->result;
1888
1889 ir_to_mesa_dst_reg l;
1890 l.file = storage->file;
1891 l.index = storage->index;
1892 l.reladdr = NULL;
1893 l.writemask = WRITEMASK_XYZW;
1894 l.cond_mask = COND_TR;
1895
1896 for (i = 0; i < type_size(param->type); i++) {
1897 ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r);
1898 l.index++;
1899 r.index++;
1900 }
1901 }
1902
1903 sig_iter.next();
1904 }
1905 assert(!sig_iter.has_next());
1906
1907 /* Emit call instruction */
1908 call_inst = ir_to_mesa_emit_op1(ir, OPCODE_CAL,
1909 ir_to_mesa_undef_dst, ir_to_mesa_undef);
1910 call_inst->function = entry;
1911
1912 /* Process out parameters. */
1913 sig_iter = sig->parameters.iterator();
1914 foreach_iter(exec_list_iterator, iter, *ir) {
1915 ir_rvalue *param_rval = (ir_rvalue *)iter.get();
1916 ir_variable *param = (ir_variable *)sig_iter.get();
1917
1918 if (param->mode == ir_var_out ||
1919 param->mode == ir_var_inout) {
1920 variable_storage *storage = find_variable_storage(param);
1921 assert(storage);
1922
1923 ir_to_mesa_src_reg r;
1924 r.file = storage->file;
1925 r.index = storage->index;
1926 r.reladdr = NULL;
1927 r.swizzle = SWIZZLE_NOOP;
1928 r.negate = 0;
1929
1930 param_rval->accept(this);
1931 ir_to_mesa_dst_reg l = ir_to_mesa_dst_reg_from_src(this->result);
1932
1933 for (i = 0; i < type_size(param->type); i++) {
1934 ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r);
1935 l.index++;
1936 r.index++;
1937 }
1938 }
1939
1940 sig_iter.next();
1941 }
1942 assert(!sig_iter.has_next());
1943
1944 /* Process return value. */
1945 this->result = entry->return_reg;
1946 }
1947
1948
1949 void
1950 ir_to_mesa_visitor::visit(ir_texture *ir)
1951 {
1952 ir_to_mesa_src_reg result_src, coord, lod_info, projector;
1953 ir_to_mesa_dst_reg result_dst, coord_dst;
1954 ir_to_mesa_instruction *inst = NULL;
1955 prog_opcode opcode = OPCODE_NOP;
1956
1957 ir->coordinate->accept(this);
1958
1959 /* Put our coords in a temp. We'll need to modify them for shadow,
1960 * projection, or LOD, so the only case we'd use it as is is if
1961 * we're doing plain old texturing. Mesa IR optimization should
1962 * handle cleaning up our mess in that case.
1963 */
1964 coord = get_temp(glsl_type::vec4_type);
1965 coord_dst = ir_to_mesa_dst_reg_from_src(coord);
1966 ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst,
1967 this->result);
1968
1969 if (ir->projector) {
1970 ir->projector->accept(this);
1971 projector = this->result;
1972 }
1973
1974 /* Storage for our result. Ideally for an assignment we'd be using
1975 * the actual storage for the result here, instead.
1976 */
1977 result_src = get_temp(glsl_type::vec4_type);
1978 result_dst = ir_to_mesa_dst_reg_from_src(result_src);
1979
1980 switch (ir->op) {
1981 case ir_tex:
1982 opcode = OPCODE_TEX;
1983 break;
1984 case ir_txb:
1985 opcode = OPCODE_TXB;
1986 ir->lod_info.bias->accept(this);
1987 lod_info = this->result;
1988 break;
1989 case ir_txl:
1990 opcode = OPCODE_TXL;
1991 ir->lod_info.lod->accept(this);
1992 lod_info = this->result;
1993 break;
1994 case ir_txd:
1995 case ir_txf:
1996 assert(!"GLSL 1.30 features unsupported");
1997 break;
1998 }
1999
2000 if (ir->projector) {
2001 if (opcode == OPCODE_TEX) {
2002 /* Slot the projector in as the last component of the coord. */
2003 coord_dst.writemask = WRITEMASK_W;
2004 ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst, projector);
2005 coord_dst.writemask = WRITEMASK_XYZW;
2006 opcode = OPCODE_TXP;
2007 } else {
2008 ir_to_mesa_src_reg coord_w = coord;
2009 coord_w.swizzle = SWIZZLE_WWWW;
2010
2011 /* For the other TEX opcodes there's no projective version
2012 * since the last slot is taken up by lod info. Do the
2013 * projective divide now.
2014 */
2015 coord_dst.writemask = WRITEMASK_W;
2016 ir_to_mesa_emit_op1(ir, OPCODE_RCP, coord_dst, projector);
2017
2018 coord_dst.writemask = WRITEMASK_XYZ;
2019 ir_to_mesa_emit_op2(ir, OPCODE_MUL, coord_dst, coord, coord_w);
2020
2021 coord_dst.writemask = WRITEMASK_XYZW;
2022 coord.swizzle = SWIZZLE_XYZW;
2023 }
2024 }
2025
2026 if (ir->shadow_comparitor) {
2027 /* Slot the shadow value in as the second to last component of the
2028 * coord.
2029 */
2030 ir->shadow_comparitor->accept(this);
2031 coord_dst.writemask = WRITEMASK_Z;
2032 ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst, this->result);
2033 coord_dst.writemask = WRITEMASK_XYZW;
2034 }
2035
2036 if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) {
2037 /* Mesa IR stores lod or lod bias in the last channel of the coords. */
2038 coord_dst.writemask = WRITEMASK_W;
2039 ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst, lod_info);
2040 coord_dst.writemask = WRITEMASK_XYZW;
2041 }
2042
2043 inst = ir_to_mesa_emit_op1(ir, opcode, result_dst, coord);
2044
2045 if (ir->shadow_comparitor)
2046 inst->tex_shadow = GL_TRUE;
2047
2048 ir_variable *sampler = ir->sampler->variable_referenced();
2049
2050 /* generate the mapping, remove when we generate storage at
2051 * declaration time
2052 */
2053 ir->sampler->accept(this);
2054
2055 inst->sampler = get_sampler_location(sampler);
2056
2057 ir_dereference_array *sampler_array = ir->sampler->as_dereference_array();
2058 if (sampler_array) {
2059 ir_constant *array_index =
2060 sampler_array->array_index->constant_expression_value();
2061
2062 /* GLSL 1.10 and 1.20 allowed variable sampler array indices,
2063 * while GLSL 1.30 requires that the array indices be constant
2064 * integer expressions. We don't expect any driver to actually
2065 * work with a really variable array index, and in 1.20 all that
2066 * would work would be an unrolled loop counter, so assert that
2067 * we ended up with a constant at least..
2068 */
2069 assert(array_index);
2070 inst->sampler += array_index->value.i[0];
2071 }
2072
2073 const glsl_type *sampler_type = sampler->type;
2074 while (sampler_type->base_type == GLSL_TYPE_ARRAY)
2075 sampler_type = sampler_type->fields.array;
2076
2077 switch (sampler_type->sampler_dimensionality) {
2078 case GLSL_SAMPLER_DIM_1D:
2079 inst->tex_target = (sampler_type->sampler_array)
2080 ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
2081 break;
2082 case GLSL_SAMPLER_DIM_2D:
2083 inst->tex_target = (sampler_type->sampler_array)
2084 ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
2085 break;
2086 case GLSL_SAMPLER_DIM_3D:
2087 inst->tex_target = TEXTURE_3D_INDEX;
2088 break;
2089 case GLSL_SAMPLER_DIM_CUBE:
2090 inst->tex_target = TEXTURE_CUBE_INDEX;
2091 break;
2092 default:
2093 assert(!"FINISHME: other texture targets");
2094 }
2095
2096 this->result = result_src;
2097 }
2098
2099 void
2100 ir_to_mesa_visitor::visit(ir_return *ir)
2101 {
2102 assert(current_function);
2103
2104 if (ir->get_value()) {
2105 ir_to_mesa_dst_reg l;
2106 int i;
2107
2108 ir->get_value()->accept(this);
2109 ir_to_mesa_src_reg r = this->result;
2110
2111 l = ir_to_mesa_dst_reg_from_src(current_function->return_reg);
2112
2113 for (i = 0; i < type_size(current_function->sig->return_type); i++) {
2114 ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r);
2115 l.index++;
2116 r.index++;
2117 }
2118 }
2119
2120 ir_to_mesa_emit_op0(ir, OPCODE_RET);
2121 }
2122
2123 void
2124 ir_to_mesa_visitor::visit(ir_discard *ir)
2125 {
2126 assert(ir->condition == NULL); /* FINISHME */
2127
2128 ir_to_mesa_emit_op0(ir, OPCODE_KIL_NV);
2129 }
2130
2131 void
2132 ir_to_mesa_visitor::visit(ir_if *ir)
2133 {
2134 ir_to_mesa_instruction *cond_inst, *if_inst, *else_inst = NULL;
2135 ir_to_mesa_instruction *prev_inst;
2136
2137 prev_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
2138
2139 ir->condition->accept(this);
2140 assert(this->result.file != PROGRAM_UNDEFINED);
2141
2142 if (ctx->Shader.EmitCondCodes) {
2143 cond_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
2144
2145 /* See if we actually generated any instruction for generating
2146 * the condition. If not, then cook up a move to a temp so we
2147 * have something to set cond_update on.
2148 */
2149 if (cond_inst == prev_inst) {
2150 ir_to_mesa_src_reg temp = get_temp(glsl_type::bool_type);
2151 cond_inst = ir_to_mesa_emit_op1(ir->condition, OPCODE_MOV,
2152 ir_to_mesa_dst_reg_from_src(temp),
2153 result);
2154 }
2155 cond_inst->cond_update = GL_TRUE;
2156
2157 if_inst = ir_to_mesa_emit_op0(ir->condition, OPCODE_IF);
2158 if_inst->dst_reg.cond_mask = COND_NE;
2159 } else {
2160 if_inst = ir_to_mesa_emit_op1(ir->condition,
2161 OPCODE_IF, ir_to_mesa_undef_dst,
2162 this->result);
2163 }
2164
2165 this->instructions.push_tail(if_inst);
2166
2167 visit_exec_list(&ir->then_instructions, this);
2168
2169 if (!ir->else_instructions.is_empty()) {
2170 else_inst = ir_to_mesa_emit_op0(ir->condition, OPCODE_ELSE);
2171 visit_exec_list(&ir->else_instructions, this);
2172 }
2173
2174 if_inst = ir_to_mesa_emit_op1(ir->condition, OPCODE_ENDIF,
2175 ir_to_mesa_undef_dst, ir_to_mesa_undef);
2176 }
2177
2178 ir_to_mesa_visitor::ir_to_mesa_visitor()
2179 {
2180 result.file = PROGRAM_UNDEFINED;
2181 next_temp = 1;
2182 next_signature_id = 1;
2183 sampler_map = NULL;
2184 current_function = NULL;
2185 }
2186
2187 ir_to_mesa_visitor::~ir_to_mesa_visitor()
2188 {
2189 if (this->sampler_map)
2190 hash_table_dtor(this->sampler_map);
2191 }
2192
2193 static struct prog_src_register
2194 mesa_src_reg_from_ir_src_reg(ir_to_mesa_src_reg reg)
2195 {
2196 struct prog_src_register mesa_reg;
2197
2198 mesa_reg.File = reg.file;
2199 assert(reg.index < (1 << INST_INDEX_BITS) - 1);
2200 mesa_reg.Index = reg.index;
2201 mesa_reg.Swizzle = reg.swizzle;
2202 mesa_reg.RelAddr = reg.reladdr != NULL;
2203 mesa_reg.Negate = reg.negate;
2204 mesa_reg.Abs = 0;
2205 mesa_reg.HasIndex2 = GL_FALSE;
2206
2207 return mesa_reg;
2208 }
2209
2210 static void
2211 set_branchtargets(ir_to_mesa_visitor *v,
2212 struct prog_instruction *mesa_instructions,
2213 int num_instructions)
2214 {
2215 int if_count = 0, loop_count = 0;
2216 int *if_stack, *loop_stack;
2217 int if_stack_pos = 0, loop_stack_pos = 0;
2218 int i, j;
2219
2220 for (i = 0; i < num_instructions; i++) {
2221 switch (mesa_instructions[i].Opcode) {
2222 case OPCODE_IF:
2223 if_count++;
2224 break;
2225 case OPCODE_BGNLOOP:
2226 loop_count++;
2227 break;
2228 case OPCODE_BRK:
2229 case OPCODE_CONT:
2230 mesa_instructions[i].BranchTarget = -1;
2231 break;
2232 default:
2233 break;
2234 }
2235 }
2236
2237 if_stack = (int *)calloc(if_count, sizeof(*if_stack));
2238 loop_stack = (int *)calloc(loop_count, sizeof(*loop_stack));
2239
2240 for (i = 0; i < num_instructions; i++) {
2241 switch (mesa_instructions[i].Opcode) {
2242 case OPCODE_IF:
2243 if_stack[if_stack_pos] = i;
2244 if_stack_pos++;
2245 break;
2246 case OPCODE_ELSE:
2247 mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
2248 if_stack[if_stack_pos - 1] = i;
2249 break;
2250 case OPCODE_ENDIF:
2251 mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
2252 if_stack_pos--;
2253 break;
2254 case OPCODE_BGNLOOP:
2255 loop_stack[loop_stack_pos] = i;
2256 loop_stack_pos++;
2257 break;
2258 case OPCODE_ENDLOOP:
2259 loop_stack_pos--;
2260 /* Rewrite any breaks/conts at this nesting level (haven't
2261 * already had a BranchTarget assigned) to point to the end
2262 * of the loop.
2263 */
2264 for (j = loop_stack[loop_stack_pos]; j < i; j++) {
2265 if (mesa_instructions[j].Opcode == OPCODE_BRK ||
2266 mesa_instructions[j].Opcode == OPCODE_CONT) {
2267 if (mesa_instructions[j].BranchTarget == -1) {
2268 mesa_instructions[j].BranchTarget = i;
2269 }
2270 }
2271 }
2272 /* The loop ends point at each other. */
2273 mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos];
2274 mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i;
2275 break;
2276 case OPCODE_CAL:
2277 foreach_iter(exec_list_iterator, iter, v->function_signatures) {
2278 function_entry *entry = (function_entry *)iter.get();
2279
2280 if (entry->sig_id == mesa_instructions[i].BranchTarget) {
2281 mesa_instructions[i].BranchTarget = entry->inst;
2282 break;
2283 }
2284 }
2285 break;
2286 default:
2287 break;
2288 }
2289 }
2290
2291 free(if_stack);
2292 }
2293
2294 static void
2295 print_program(struct prog_instruction *mesa_instructions,
2296 ir_instruction **mesa_instruction_annotation,
2297 int num_instructions)
2298 {
2299 ir_instruction *last_ir = NULL;
2300 int i;
2301 int indent = 0;
2302
2303 for (i = 0; i < num_instructions; i++) {
2304 struct prog_instruction *mesa_inst = mesa_instructions + i;
2305 ir_instruction *ir = mesa_instruction_annotation[i];
2306
2307 fprintf(stdout, "%3d: ", i);
2308
2309 if (last_ir != ir && ir) {
2310 int j;
2311
2312 for (j = 0; j < indent; j++) {
2313 fprintf(stdout, " ");
2314 }
2315 ir->print();
2316 printf("\n");
2317 last_ir = ir;
2318
2319 fprintf(stdout, " "); /* line number spacing. */
2320 }
2321
2322 indent = _mesa_fprint_instruction_opt(stdout, mesa_inst, indent,
2323 PROG_PRINT_DEBUG, NULL);
2324 }
2325 }
2326
2327 static void
2328 count_resources(struct gl_program *prog)
2329 {
2330 unsigned int i;
2331
2332 prog->SamplersUsed = 0;
2333
2334 for (i = 0; i < prog->NumInstructions; i++) {
2335 struct prog_instruction *inst = &prog->Instructions[i];
2336
2337 /* Instead of just using the uniform's value to map to a
2338 * sampler, Mesa first allocates a separate number for the
2339 * sampler (_mesa_add_sampler), then we reindex it down to a
2340 * small integer (sampler_map[], SamplersUsed), then that gets
2341 * mapped to the uniform's value, and we get an actual sampler.
2342 */
2343 if (_mesa_is_tex_instruction(inst->Opcode)) {
2344 prog->SamplerTargets[inst->TexSrcUnit] =
2345 (gl_texture_index)inst->TexSrcTarget;
2346 prog->SamplersUsed |= 1 << inst->TexSrcUnit;
2347 if (inst->TexShadow) {
2348 prog->ShadowSamplers |= 1 << inst->TexSrcUnit;
2349 }
2350 }
2351 }
2352
2353 _mesa_update_shader_textures_used(prog);
2354 }
2355
2356 /* Each stage has some uniforms in its Parameters list. The Uniforms
2357 * list for the linked shader program has a pointer to these uniforms
2358 * in each of the stage's Parameters list, so that their values can be
2359 * updated when a uniform is set.
2360 */
2361 static void
2362 link_uniforms_to_shared_uniform_list(struct gl_uniform_list *uniforms,
2363 struct gl_program *prog)
2364 {
2365 unsigned int i;
2366
2367 for (i = 0; i < prog->Parameters->NumParameters; i++) {
2368 const struct gl_program_parameter *p = prog->Parameters->Parameters + i;
2369
2370 if (p->Type == PROGRAM_UNIFORM || p->Type == PROGRAM_SAMPLER) {
2371 struct gl_uniform *uniform =
2372 _mesa_append_uniform(uniforms, p->Name, prog->Target, i);
2373 if (uniform)
2374 uniform->Initialized = p->Initialized;
2375 }
2376 }
2377 }
2378
2379 struct gl_program *
2380 get_mesa_program(GLcontext *ctx, struct gl_shader_program *shader_program,
2381 struct gl_shader *shader)
2382 {
2383 void *mem_ctx = shader_program;
2384 ir_to_mesa_visitor v;
2385 struct prog_instruction *mesa_instructions, *mesa_inst;
2386 ir_instruction **mesa_instruction_annotation;
2387 int i;
2388 struct gl_program *prog;
2389 GLenum target;
2390 const char *target_string;
2391 GLboolean progress;
2392
2393 switch (shader->Type) {
2394 case GL_VERTEX_SHADER:
2395 target = GL_VERTEX_PROGRAM_ARB;
2396 target_string = "vertex";
2397 break;
2398 case GL_FRAGMENT_SHADER:
2399 target = GL_FRAGMENT_PROGRAM_ARB;
2400 target_string = "fragment";
2401 break;
2402 default:
2403 assert(!"should not be reached");
2404 break;
2405 }
2406
2407 validate_ir_tree(shader->ir);
2408
2409 prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
2410 if (!prog)
2411 return NULL;
2412 prog->Parameters = _mesa_new_parameter_list();
2413 prog->Varying = _mesa_new_parameter_list();
2414 prog->Attributes = _mesa_new_parameter_list();
2415 v.ctx = ctx;
2416 v.prog = prog;
2417
2418 v.mem_ctx = talloc_new(NULL);
2419
2420 /* Emit Mesa IR for main(). */
2421 visit_exec_list(shader->ir, &v);
2422 v.ir_to_mesa_emit_op0(NULL, OPCODE_END);
2423
2424 /* Now emit bodies for any functions that were used. */
2425 do {
2426 progress = GL_FALSE;
2427
2428 foreach_iter(exec_list_iterator, iter, v.function_signatures) {
2429 function_entry *entry = (function_entry *)iter.get();
2430
2431 if (!entry->bgn_inst) {
2432 v.current_function = entry;
2433
2434 entry->bgn_inst = v.ir_to_mesa_emit_op0(NULL, OPCODE_BGNSUB);
2435 entry->bgn_inst->function = entry;
2436
2437 visit_exec_list(&entry->sig->body, &v);
2438
2439 ir_to_mesa_instruction *last;
2440 last = (ir_to_mesa_instruction *)v.instructions.get_tail();
2441 if (last->op != OPCODE_RET)
2442 v.ir_to_mesa_emit_op0(NULL, OPCODE_RET);
2443
2444 ir_to_mesa_instruction *end;
2445 end = v.ir_to_mesa_emit_op0(NULL, OPCODE_ENDSUB);
2446 end->function = entry;
2447
2448 progress = GL_TRUE;
2449 }
2450 }
2451 } while (progress);
2452
2453 prog->NumTemporaries = v.next_temp;
2454
2455 int num_instructions = 0;
2456 foreach_iter(exec_list_iterator, iter, v.instructions) {
2457 num_instructions++;
2458 }
2459
2460 mesa_instructions =
2461 (struct prog_instruction *)calloc(num_instructions,
2462 sizeof(*mesa_instructions));
2463 mesa_instruction_annotation = talloc_array(mem_ctx, ir_instruction *,
2464 num_instructions);
2465
2466 mesa_inst = mesa_instructions;
2467 i = 0;
2468 foreach_iter(exec_list_iterator, iter, v.instructions) {
2469 ir_to_mesa_instruction *inst = (ir_to_mesa_instruction *)iter.get();
2470
2471 mesa_inst->Opcode = inst->op;
2472 mesa_inst->CondUpdate = inst->cond_update;
2473 mesa_inst->DstReg.File = inst->dst_reg.file;
2474 mesa_inst->DstReg.Index = inst->dst_reg.index;
2475 mesa_inst->DstReg.CondMask = inst->dst_reg.cond_mask;
2476 mesa_inst->DstReg.WriteMask = inst->dst_reg.writemask;
2477 mesa_inst->DstReg.RelAddr = inst->dst_reg.reladdr != NULL;
2478 mesa_inst->SrcReg[0] = mesa_src_reg_from_ir_src_reg(inst->src_reg[0]);
2479 mesa_inst->SrcReg[1] = mesa_src_reg_from_ir_src_reg(inst->src_reg[1]);
2480 mesa_inst->SrcReg[2] = mesa_src_reg_from_ir_src_reg(inst->src_reg[2]);
2481 mesa_inst->TexSrcUnit = inst->sampler;
2482 mesa_inst->TexSrcTarget = inst->tex_target;
2483 mesa_inst->TexShadow = inst->tex_shadow;
2484 mesa_instruction_annotation[i] = inst->ir;
2485
2486 if (ctx->Shader.EmitNoIfs && mesa_inst->Opcode == OPCODE_IF) {
2487 shader_program->InfoLog =
2488 talloc_asprintf_append(shader_program->InfoLog,
2489 "Couldn't flatten if statement\n");
2490 shader_program->LinkStatus = false;
2491 }
2492
2493 switch (mesa_inst->Opcode) {
2494 case OPCODE_BGNSUB:
2495 inst->function->inst = i;
2496 mesa_inst->Comment = strdup(inst->function->sig->function_name());
2497 break;
2498 case OPCODE_ENDSUB:
2499 mesa_inst->Comment = strdup(inst->function->sig->function_name());
2500 break;
2501 case OPCODE_CAL:
2502 mesa_inst->BranchTarget = inst->function->sig_id; /* rewritten later */
2503 break;
2504 case OPCODE_ARL:
2505 prog->NumAddressRegs = 1;
2506 break;
2507 default:
2508 break;
2509 }
2510
2511 mesa_inst++;
2512 i++;
2513 }
2514
2515 set_branchtargets(&v, mesa_instructions, num_instructions);
2516
2517 if (ctx->Shader.Flags & GLSL_DUMP) {
2518 printf("\n");
2519 printf("GLSL IR for linked %s program %d:\n", target_string,
2520 shader_program->Name);
2521 _mesa_print_ir(shader->ir, NULL);
2522 printf("\n");
2523 printf("\n");
2524 printf("Mesa IR for linked %s program %d:\n", target_string,
2525 shader_program->Name);
2526 print_program(mesa_instructions, mesa_instruction_annotation,
2527 num_instructions);
2528 }
2529
2530 prog->Instructions = mesa_instructions;
2531 prog->NumInstructions = num_instructions;
2532
2533 do_set_program_inouts(shader->ir, prog);
2534 count_resources(prog);
2535
2536 _mesa_reference_program(ctx, &shader->Program, prog);
2537
2538 if ((ctx->Shader.Flags & GLSL_NO_OPT) == 0) {
2539 _mesa_optimize_program(ctx, prog);
2540 }
2541
2542 return prog;
2543 }
2544
2545 extern "C" {
2546
2547 void
2548 _mesa_glsl_compile_shader(GLcontext *ctx, struct gl_shader *shader)
2549 {
2550 struct _mesa_glsl_parse_state *state =
2551 new(shader) _mesa_glsl_parse_state(ctx, shader->Type, shader);
2552
2553 const char *source = shader->Source;
2554 state->error = preprocess(state, &source, &state->info_log,
2555 &ctx->Extensions);
2556
2557 if (!state->error) {
2558 _mesa_glsl_lexer_ctor(state, source);
2559 _mesa_glsl_parse(state);
2560 _mesa_glsl_lexer_dtor(state);
2561 }
2562
2563 shader->ir = new(shader) exec_list;
2564 if (!state->error && !state->translation_unit.is_empty())
2565 _mesa_ast_to_hir(shader->ir, state);
2566
2567 if (!state->error && !shader->ir->is_empty()) {
2568 validate_ir_tree(shader->ir);
2569
2570 /* Do some optimization at compile time to reduce shader IR size
2571 * and reduce later work if the same shader is linked multiple times
2572 */
2573 while (do_common_optimization(shader->ir, false))
2574 ;
2575
2576 validate_ir_tree(shader->ir);
2577 }
2578
2579 shader->symbols = state->symbols;
2580
2581 shader->CompileStatus = !state->error;
2582 shader->InfoLog = state->info_log;
2583 shader->Version = state->language_version;
2584 memcpy(shader->builtins_to_link, state->builtins_to_link,
2585 sizeof(shader->builtins_to_link[0]) * state->num_builtins_to_link);
2586 shader->num_builtins_to_link = state->num_builtins_to_link;
2587
2588 if (ctx->Shader.Flags & GLSL_LOG) {
2589 _mesa_write_shader_to_file(shader);
2590 }
2591
2592 if (ctx->Shader.Flags & GLSL_DUMP) {
2593 printf("GLSL source for shader %d:\n", shader->Name);
2594 printf("%s\n", shader->Source);
2595
2596 if (shader->CompileStatus) {
2597 printf("GLSL IR for shader %d:\n", shader->Name);
2598 _mesa_print_ir(shader->ir, NULL);
2599 printf("\n\n");
2600 }
2601 }
2602
2603 /* Retain any live IR, but trash the rest. */
2604 reparent_ir(shader->ir, shader);
2605
2606 talloc_free(state);
2607 }
2608
2609 void
2610 _mesa_glsl_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
2611 {
2612 unsigned int i;
2613
2614 _mesa_clear_shader_program_data(ctx, prog);
2615
2616 prog->LinkStatus = GL_TRUE;
2617
2618 for (i = 0; i < prog->NumShaders; i++) {
2619 if (!prog->Shaders[i]->CompileStatus) {
2620 prog->InfoLog =
2621 talloc_asprintf_append(prog->InfoLog,
2622 "linking with uncompiled shader");
2623 prog->LinkStatus = GL_FALSE;
2624 }
2625 }
2626
2627 prog->Varying = _mesa_new_parameter_list();
2628 _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL);
2629 _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL);
2630
2631 if (prog->LinkStatus) {
2632 link_shaders(prog);
2633
2634 /* We don't use the linker's uniforms list, and cook up our own at
2635 * generate time.
2636 */
2637 free(prog->Uniforms);
2638 prog->Uniforms = _mesa_new_uniform_list();
2639 }
2640
2641 if (prog->LinkStatus) {
2642 for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) {
2643 bool progress;
2644 exec_list *ir = prog->_LinkedShaders[i]->ir;
2645
2646 do {
2647 progress = false;
2648
2649 /* Lowering */
2650 do_mat_op_to_vec(ir);
2651 do_mod_to_fract(ir);
2652 do_div_to_mul_rcp(ir);
2653 do_explog_to_explog2(ir);
2654
2655 progress = do_common_optimization(ir, true) || progress;
2656
2657 if (ctx->Shader.EmitNoIfs)
2658 progress = do_if_to_cond_assign(ir) || progress;
2659
2660 progress = do_vec_index_to_cond_assign(ir) || progress;
2661 } while (progress);
2662 }
2663 }
2664
2665 if (prog->LinkStatus) {
2666 for (i = 0; i < prog->_NumLinkedShaders; i++) {
2667 struct gl_program *linked_prog;
2668 bool ok = true;
2669
2670 linked_prog = get_mesa_program(ctx, prog,
2671 prog->_LinkedShaders[i]);
2672
2673 link_uniforms_to_shared_uniform_list(prog->Uniforms, linked_prog);
2674
2675 switch (prog->_LinkedShaders[i]->Type) {
2676 case GL_VERTEX_SHADER:
2677 _mesa_reference_vertprog(ctx, &prog->VertexProgram,
2678 (struct gl_vertex_program *)linked_prog);
2679 ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB,
2680 linked_prog);
2681 break;
2682 case GL_FRAGMENT_SHADER:
2683 _mesa_reference_fragprog(ctx, &prog->FragmentProgram,
2684 (struct gl_fragment_program *)linked_prog);
2685 ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB,
2686 linked_prog);
2687 break;
2688 }
2689 if (!ok) {
2690 prog->LinkStatus = GL_FALSE;
2691 }
2692 }
2693 }
2694 }
2695
2696 } /* extern "C" */