glsl2: Add constant propagation.
[mesa.git] / src / mesa / program / ir_to_mesa.cpp
1 /*
2 * Copyright (C) 2005-2007 Brian Paul All Rights Reserved.
3 * Copyright (C) 2008 VMware, Inc. All Rights Reserved.
4 * Copyright © 2010 Intel Corporation
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
24 */
25
26 /**
27 * \file ir_to_mesa.cpp
28 *
29 * Translates the IR to ARB_fragment_program text if possible,
30 * printing the result
31 */
32
33 #include <stdio.h>
34 #include "ir.h"
35 #include "ir_visitor.h"
36 #include "ir_print_visitor.h"
37 #include "ir_expression_flattening.h"
38 #include "glsl_types.h"
39 #include "glsl_parser_extras.h"
40 #include "../glsl/program.h"
41 #include "ir_optimization.h"
42 #include "ast.h"
43
44 extern "C" {
45 #include "main/mtypes.h"
46 #include "main/shaderapi.h"
47 #include "main/shaderobj.h"
48 #include "main/uniforms.h"
49 #include "program/hash_table.h"
50 #include "program/prog_instruction.h"
51 #include "program/prog_optimize.h"
52 #include "program/prog_print.h"
53 #include "program/program.h"
54 #include "program/prog_uniform.h"
55 #include "program/prog_parameter.h"
56 }
57
58 static int swizzle_for_size(int size);
59
60 /**
61 * This struct is a corresponding struct to Mesa prog_src_register, with
62 * wider fields.
63 */
64 typedef struct ir_to_mesa_src_reg {
65 ir_to_mesa_src_reg(int file, int index, const glsl_type *type)
66 {
67 this->file = file;
68 this->index = index;
69 if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
70 this->swizzle = swizzle_for_size(type->vector_elements);
71 else
72 this->swizzle = SWIZZLE_XYZW;
73 this->negate = 0;
74 this->reladdr = NULL;
75 }
76
77 ir_to_mesa_src_reg()
78 {
79 this->file = PROGRAM_UNDEFINED;
80 }
81
82 int file; /**< PROGRAM_* from Mesa */
83 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
84 GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
85 int negate; /**< NEGATE_XYZW mask from mesa */
86 /** Register index should be offset by the integer in this reg. */
87 ir_to_mesa_src_reg *reladdr;
88 } ir_to_mesa_src_reg;
89
90 typedef struct ir_to_mesa_dst_reg {
91 int file; /**< PROGRAM_* from Mesa */
92 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
93 int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
94 GLuint cond_mask:4;
95 /** Register index should be offset by the integer in this reg. */
96 ir_to_mesa_src_reg *reladdr;
97 } ir_to_mesa_dst_reg;
98
99 extern ir_to_mesa_src_reg ir_to_mesa_undef;
100
101 class ir_to_mesa_instruction : public exec_node {
102 public:
103 enum prog_opcode op;
104 ir_to_mesa_dst_reg dst_reg;
105 ir_to_mesa_src_reg src_reg[3];
106 /** Pointer to the ir source this tree came from for debugging */
107 ir_instruction *ir;
108 GLboolean cond_update;
109 int sampler; /**< sampler index */
110 int tex_target; /**< One of TEXTURE_*_INDEX */
111 GLboolean tex_shadow;
112
113 class function_entry *function; /* Set on OPCODE_CAL or OPCODE_BGNSUB */
114 };
115
116 class variable_storage : public exec_node {
117 public:
118 variable_storage(ir_variable *var, int file, int index)
119 : file(file), index(index), var(var)
120 {
121 /* empty */
122 }
123
124 int file;
125 int index;
126 ir_variable *var; /* variable that maps to this, if any */
127 };
128
129 class function_entry : public exec_node {
130 public:
131 ir_function_signature *sig;
132
133 /**
134 * identifier of this function signature used by the program.
135 *
136 * At the point that Mesa instructions for function calls are
137 * generated, we don't know the address of the first instruction of
138 * the function body. So we make the BranchTarget that is called a
139 * small integer and rewrite them during set_branchtargets().
140 */
141 int sig_id;
142
143 /**
144 * Pointer to first instruction of the function body.
145 *
146 * Set during function body emits after main() is processed.
147 */
148 ir_to_mesa_instruction *bgn_inst;
149
150 /**
151 * Index of the first instruction of the function body in actual
152 * Mesa IR.
153 *
154 * Set after convertion from ir_to_mesa_instruction to prog_instruction.
155 */
156 int inst;
157
158 /** Storage for the return value. */
159 ir_to_mesa_src_reg return_reg;
160 };
161
162 class ir_to_mesa_visitor : public ir_visitor {
163 public:
164 ir_to_mesa_visitor();
165 ~ir_to_mesa_visitor();
166
167 function_entry *current_function;
168
169 GLcontext *ctx;
170 struct gl_program *prog;
171
172 int next_temp;
173
174 variable_storage *find_variable_storage(ir_variable *var);
175
176 function_entry *get_function_signature(ir_function_signature *sig);
177
178 ir_to_mesa_src_reg get_temp(const glsl_type *type);
179 void reladdr_to_temp(ir_instruction *ir,
180 ir_to_mesa_src_reg *reg, int *num_reladdr);
181
182 struct ir_to_mesa_src_reg src_reg_for_float(float val);
183
184 /**
185 * \name Visit methods
186 *
187 * As typical for the visitor pattern, there must be one \c visit method for
188 * each concrete subclass of \c ir_instruction. Virtual base classes within
189 * the hierarchy should not have \c visit methods.
190 */
191 /*@{*/
192 virtual void visit(ir_variable *);
193 virtual void visit(ir_loop *);
194 virtual void visit(ir_loop_jump *);
195 virtual void visit(ir_function_signature *);
196 virtual void visit(ir_function *);
197 virtual void visit(ir_expression *);
198 virtual void visit(ir_swizzle *);
199 virtual void visit(ir_dereference_variable *);
200 virtual void visit(ir_dereference_array *);
201 virtual void visit(ir_dereference_record *);
202 virtual void visit(ir_assignment *);
203 virtual void visit(ir_constant *);
204 virtual void visit(ir_call *);
205 virtual void visit(ir_return *);
206 virtual void visit(ir_discard *);
207 virtual void visit(ir_texture *);
208 virtual void visit(ir_if *);
209 /*@}*/
210
211 struct ir_to_mesa_src_reg result;
212
213 /** List of variable_storage */
214 exec_list variables;
215
216 /** List of function_entry */
217 exec_list function_signatures;
218 int next_signature_id;
219
220 /** List of ir_to_mesa_instruction */
221 exec_list instructions;
222
223 ir_to_mesa_instruction *ir_to_mesa_emit_op0(ir_instruction *ir,
224 enum prog_opcode op);
225
226 ir_to_mesa_instruction *ir_to_mesa_emit_op1(ir_instruction *ir,
227 enum prog_opcode op,
228 ir_to_mesa_dst_reg dst,
229 ir_to_mesa_src_reg src0);
230
231 ir_to_mesa_instruction *ir_to_mesa_emit_op2(ir_instruction *ir,
232 enum prog_opcode op,
233 ir_to_mesa_dst_reg dst,
234 ir_to_mesa_src_reg src0,
235 ir_to_mesa_src_reg src1);
236
237 ir_to_mesa_instruction *ir_to_mesa_emit_op3(ir_instruction *ir,
238 enum prog_opcode op,
239 ir_to_mesa_dst_reg dst,
240 ir_to_mesa_src_reg src0,
241 ir_to_mesa_src_reg src1,
242 ir_to_mesa_src_reg src2);
243
244 void ir_to_mesa_emit_scalar_op1(ir_instruction *ir,
245 enum prog_opcode op,
246 ir_to_mesa_dst_reg dst,
247 ir_to_mesa_src_reg src0);
248
249 void ir_to_mesa_emit_scalar_op2(ir_instruction *ir,
250 enum prog_opcode op,
251 ir_to_mesa_dst_reg dst,
252 ir_to_mesa_src_reg src0,
253 ir_to_mesa_src_reg src1);
254
255 GLboolean try_emit_mad(ir_expression *ir,
256 int mul_operand);
257
258 int add_uniform(const char *name,
259 const glsl_type *type,
260 ir_constant *constant);
261 void add_aggregate_uniform(ir_instruction *ir,
262 const char *name,
263 const struct glsl_type *type,
264 ir_constant *constant,
265 struct ir_to_mesa_dst_reg temp);
266
267 struct hash_table *sampler_map;
268
269 void set_sampler_location(ir_variable *sampler, int location);
270 int get_sampler_location(ir_variable *sampler);
271
272 void *mem_ctx;
273 };
274
275 ir_to_mesa_src_reg ir_to_mesa_undef = ir_to_mesa_src_reg(PROGRAM_UNDEFINED, 0, NULL);
276
277 ir_to_mesa_dst_reg ir_to_mesa_undef_dst = {
278 PROGRAM_UNDEFINED, 0, SWIZZLE_NOOP, COND_TR, NULL,
279 };
280
281 ir_to_mesa_dst_reg ir_to_mesa_address_reg = {
282 PROGRAM_ADDRESS, 0, WRITEMASK_X, COND_TR, NULL
283 };
284
285 static int swizzle_for_size(int size)
286 {
287 int size_swizzles[4] = {
288 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
289 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
290 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
291 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
292 };
293
294 return size_swizzles[size - 1];
295 }
296
297 ir_to_mesa_instruction *
298 ir_to_mesa_visitor::ir_to_mesa_emit_op3(ir_instruction *ir,
299 enum prog_opcode op,
300 ir_to_mesa_dst_reg dst,
301 ir_to_mesa_src_reg src0,
302 ir_to_mesa_src_reg src1,
303 ir_to_mesa_src_reg src2)
304 {
305 ir_to_mesa_instruction *inst = new(mem_ctx) ir_to_mesa_instruction();
306 int num_reladdr = 0;
307
308 /* If we have to do relative addressing, we want to load the ARL
309 * reg directly for one of the regs, and preload the other reladdr
310 * sources into temps.
311 */
312 num_reladdr += dst.reladdr != NULL;
313 num_reladdr += src0.reladdr != NULL;
314 num_reladdr += src1.reladdr != NULL;
315 num_reladdr += src2.reladdr != NULL;
316
317 reladdr_to_temp(ir, &src2, &num_reladdr);
318 reladdr_to_temp(ir, &src1, &num_reladdr);
319 reladdr_to_temp(ir, &src0, &num_reladdr);
320
321 if (dst.reladdr) {
322 ir_to_mesa_emit_op1(ir, OPCODE_ARL, ir_to_mesa_address_reg,
323 *dst.reladdr);
324
325 num_reladdr--;
326 }
327 assert(num_reladdr == 0);
328
329 inst->op = op;
330 inst->dst_reg = dst;
331 inst->src_reg[0] = src0;
332 inst->src_reg[1] = src1;
333 inst->src_reg[2] = src2;
334 inst->ir = ir;
335
336 inst->function = NULL;
337
338 this->instructions.push_tail(inst);
339
340 return inst;
341 }
342
343
344 ir_to_mesa_instruction *
345 ir_to_mesa_visitor::ir_to_mesa_emit_op2(ir_instruction *ir,
346 enum prog_opcode op,
347 ir_to_mesa_dst_reg dst,
348 ir_to_mesa_src_reg src0,
349 ir_to_mesa_src_reg src1)
350 {
351 return ir_to_mesa_emit_op3(ir, op, dst, src0, src1, ir_to_mesa_undef);
352 }
353
354 ir_to_mesa_instruction *
355 ir_to_mesa_visitor::ir_to_mesa_emit_op1(ir_instruction *ir,
356 enum prog_opcode op,
357 ir_to_mesa_dst_reg dst,
358 ir_to_mesa_src_reg src0)
359 {
360 assert(dst.writemask != 0);
361 return ir_to_mesa_emit_op3(ir, op, dst,
362 src0, ir_to_mesa_undef, ir_to_mesa_undef);
363 }
364
365 ir_to_mesa_instruction *
366 ir_to_mesa_visitor::ir_to_mesa_emit_op0(ir_instruction *ir,
367 enum prog_opcode op)
368 {
369 return ir_to_mesa_emit_op3(ir, op, ir_to_mesa_undef_dst,
370 ir_to_mesa_undef,
371 ir_to_mesa_undef,
372 ir_to_mesa_undef);
373 }
374
375 void
376 ir_to_mesa_visitor::set_sampler_location(ir_variable *sampler, int location)
377 {
378 if (this->sampler_map == NULL) {
379 this->sampler_map = hash_table_ctor(0, hash_table_pointer_hash,
380 hash_table_pointer_compare);
381 }
382
383 hash_table_insert(this->sampler_map, (void *)(uintptr_t)location, sampler);
384 }
385
386 int
387 ir_to_mesa_visitor::get_sampler_location(ir_variable *sampler)
388 {
389 void *result = hash_table_find(this->sampler_map, sampler);
390
391 return (int)(uintptr_t)result;
392 }
393
394 inline ir_to_mesa_dst_reg
395 ir_to_mesa_dst_reg_from_src(ir_to_mesa_src_reg reg)
396 {
397 ir_to_mesa_dst_reg dst_reg;
398
399 dst_reg.file = reg.file;
400 dst_reg.index = reg.index;
401 dst_reg.writemask = WRITEMASK_XYZW;
402 dst_reg.cond_mask = COND_TR;
403 dst_reg.reladdr = reg.reladdr;
404
405 return dst_reg;
406 }
407
408 inline ir_to_mesa_src_reg
409 ir_to_mesa_src_reg_from_dst(ir_to_mesa_dst_reg reg)
410 {
411 return ir_to_mesa_src_reg(reg.file, reg.index, NULL);
412 }
413
414 /**
415 * Emits Mesa scalar opcodes to produce unique answers across channels.
416 *
417 * Some Mesa opcodes are scalar-only, like ARB_fp/vp. The src X
418 * channel determines the result across all channels. So to do a vec4
419 * of this operation, we want to emit a scalar per source channel used
420 * to produce dest channels.
421 */
422 void
423 ir_to_mesa_visitor::ir_to_mesa_emit_scalar_op2(ir_instruction *ir,
424 enum prog_opcode op,
425 ir_to_mesa_dst_reg dst,
426 ir_to_mesa_src_reg orig_src0,
427 ir_to_mesa_src_reg orig_src1)
428 {
429 int i, j;
430 int done_mask = ~dst.writemask;
431
432 /* Mesa RCP is a scalar operation splatting results to all channels,
433 * like ARB_fp/vp. So emit as many RCPs as necessary to cover our
434 * dst channels.
435 */
436 for (i = 0; i < 4; i++) {
437 GLuint this_mask = (1 << i);
438 ir_to_mesa_instruction *inst;
439 ir_to_mesa_src_reg src0 = orig_src0;
440 ir_to_mesa_src_reg src1 = orig_src1;
441
442 if (done_mask & this_mask)
443 continue;
444
445 GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
446 GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
447 for (j = i + 1; j < 4; j++) {
448 if (!(done_mask & (1 << j)) &&
449 GET_SWZ(src0.swizzle, j) == src0_swiz &&
450 GET_SWZ(src1.swizzle, j) == src1_swiz) {
451 this_mask |= (1 << j);
452 }
453 }
454 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
455 src0_swiz, src0_swiz);
456 src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
457 src1_swiz, src1_swiz);
458
459 inst = ir_to_mesa_emit_op2(ir, op,
460 dst,
461 src0,
462 src1);
463 inst->dst_reg.writemask = this_mask;
464 done_mask |= this_mask;
465 }
466 }
467
468 void
469 ir_to_mesa_visitor::ir_to_mesa_emit_scalar_op1(ir_instruction *ir,
470 enum prog_opcode op,
471 ir_to_mesa_dst_reg dst,
472 ir_to_mesa_src_reg src0)
473 {
474 ir_to_mesa_src_reg undef = ir_to_mesa_undef;
475
476 undef.swizzle = SWIZZLE_XXXX;
477
478 ir_to_mesa_emit_scalar_op2(ir, op, dst, src0, undef);
479 }
480
481 struct ir_to_mesa_src_reg
482 ir_to_mesa_visitor::src_reg_for_float(float val)
483 {
484 ir_to_mesa_src_reg src_reg(PROGRAM_CONSTANT, -1, NULL);
485
486 src_reg.index = _mesa_add_unnamed_constant(this->prog->Parameters,
487 &val, 1, &src_reg.swizzle);
488
489 return src_reg;
490 }
491
492 static int
493 type_size(const struct glsl_type *type)
494 {
495 unsigned int i;
496 int size;
497
498 switch (type->base_type) {
499 case GLSL_TYPE_UINT:
500 case GLSL_TYPE_INT:
501 case GLSL_TYPE_FLOAT:
502 case GLSL_TYPE_BOOL:
503 if (type->is_matrix()) {
504 return type->matrix_columns;
505 } else {
506 /* Regardless of size of vector, it gets a vec4. This is bad
507 * packing for things like floats, but otherwise arrays become a
508 * mess. Hopefully a later pass over the code can pack scalars
509 * down if appropriate.
510 */
511 return 1;
512 }
513 case GLSL_TYPE_ARRAY:
514 return type_size(type->fields.array) * type->length;
515 case GLSL_TYPE_STRUCT:
516 size = 0;
517 for (i = 0; i < type->length; i++) {
518 size += type_size(type->fields.structure[i].type);
519 }
520 return size;
521 case GLSL_TYPE_SAMPLER:
522 /* Samplers take up no register space, since they're baked in at
523 * link time.
524 */
525 return 0;
526 default:
527 assert(0);
528 }
529 }
530
531 /**
532 * In the initial pass of codegen, we assign temporary numbers to
533 * intermediate results. (not SSA -- variable assignments will reuse
534 * storage). Actual register allocation for the Mesa VM occurs in a
535 * pass over the Mesa IR later.
536 */
537 ir_to_mesa_src_reg
538 ir_to_mesa_visitor::get_temp(const glsl_type *type)
539 {
540 ir_to_mesa_src_reg src_reg;
541 int swizzle[4];
542 int i;
543
544 src_reg.file = PROGRAM_TEMPORARY;
545 src_reg.index = next_temp;
546 src_reg.reladdr = NULL;
547 next_temp += type_size(type);
548
549 if (type->is_array() || type->is_record()) {
550 src_reg.swizzle = SWIZZLE_NOOP;
551 } else {
552 for (i = 0; i < type->vector_elements; i++)
553 swizzle[i] = i;
554 for (; i < 4; i++)
555 swizzle[i] = type->vector_elements - 1;
556 src_reg.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1],
557 swizzle[2], swizzle[3]);
558 }
559 src_reg.negate = 0;
560
561 return src_reg;
562 }
563
564 variable_storage *
565 ir_to_mesa_visitor::find_variable_storage(ir_variable *var)
566 {
567
568 variable_storage *entry;
569
570 foreach_iter(exec_list_iterator, iter, this->variables) {
571 entry = (variable_storage *)iter.get();
572
573 if (entry->var == var)
574 return entry;
575 }
576
577 return NULL;
578 }
579
580 void
581 ir_to_mesa_visitor::visit(ir_variable *ir)
582 {
583 if (strcmp(ir->name, "gl_FragCoord") == 0) {
584 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
585
586 fp->OriginUpperLeft = ir->origin_upper_left;
587 fp->PixelCenterInteger = ir->pixel_center_integer;
588 }
589 }
590
591 void
592 ir_to_mesa_visitor::visit(ir_loop *ir)
593 {
594 assert(!ir->from);
595 assert(!ir->to);
596 assert(!ir->increment);
597 assert(!ir->counter);
598
599 ir_to_mesa_emit_op0(NULL, OPCODE_BGNLOOP);
600 visit_exec_list(&ir->body_instructions, this);
601 ir_to_mesa_emit_op0(NULL, OPCODE_ENDLOOP);
602 }
603
604 void
605 ir_to_mesa_visitor::visit(ir_loop_jump *ir)
606 {
607 switch (ir->mode) {
608 case ir_loop_jump::jump_break:
609 ir_to_mesa_emit_op0(NULL, OPCODE_BRK);
610 break;
611 case ir_loop_jump::jump_continue:
612 ir_to_mesa_emit_op0(NULL, OPCODE_CONT);
613 break;
614 }
615 }
616
617
618 void
619 ir_to_mesa_visitor::visit(ir_function_signature *ir)
620 {
621 assert(0);
622 (void)ir;
623 }
624
625 void
626 ir_to_mesa_visitor::visit(ir_function *ir)
627 {
628 /* Ignore function bodies other than main() -- we shouldn't see calls to
629 * them since they should all be inlined before we get to ir_to_mesa.
630 */
631 if (strcmp(ir->name, "main") == 0) {
632 const ir_function_signature *sig;
633 exec_list empty;
634
635 sig = ir->matching_signature(&empty);
636
637 assert(sig);
638
639 foreach_iter(exec_list_iterator, iter, sig->body) {
640 ir_instruction *ir = (ir_instruction *)iter.get();
641
642 ir->accept(this);
643 }
644 }
645 }
646
647 GLboolean
648 ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
649 {
650 int nonmul_operand = 1 - mul_operand;
651 ir_to_mesa_src_reg a, b, c;
652
653 ir_expression *expr = ir->operands[mul_operand]->as_expression();
654 if (!expr || expr->operation != ir_binop_mul)
655 return false;
656
657 expr->operands[0]->accept(this);
658 a = this->result;
659 expr->operands[1]->accept(this);
660 b = this->result;
661 ir->operands[nonmul_operand]->accept(this);
662 c = this->result;
663
664 this->result = get_temp(ir->type);
665 ir_to_mesa_emit_op3(ir, OPCODE_MAD,
666 ir_to_mesa_dst_reg_from_src(this->result), a, b, c);
667
668 return true;
669 }
670
671 void
672 ir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir,
673 ir_to_mesa_src_reg *reg, int *num_reladdr)
674 {
675 if (!reg->reladdr)
676 return;
677
678 ir_to_mesa_emit_op1(ir, OPCODE_ARL, ir_to_mesa_address_reg, *reg->reladdr);
679
680 if (*num_reladdr != 1) {
681 ir_to_mesa_src_reg temp = get_temp(glsl_type::vec4_type);
682
683 ir_to_mesa_emit_op1(ir, OPCODE_MOV,
684 ir_to_mesa_dst_reg_from_src(temp), *reg);
685 *reg = temp;
686 }
687
688 (*num_reladdr)--;
689 }
690
691 void
692 ir_to_mesa_visitor::visit(ir_expression *ir)
693 {
694 unsigned int operand;
695 struct ir_to_mesa_src_reg op[2];
696 struct ir_to_mesa_src_reg result_src;
697 struct ir_to_mesa_dst_reg result_dst;
698 const glsl_type *vec4_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 4, 1);
699 const glsl_type *vec3_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 3, 1);
700 const glsl_type *vec2_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 2, 1);
701
702 /* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c)
703 */
704 if (ir->operation == ir_binop_add) {
705 if (try_emit_mad(ir, 1))
706 return;
707 if (try_emit_mad(ir, 0))
708 return;
709 }
710
711 for (operand = 0; operand < ir->get_num_operands(); operand++) {
712 this->result.file = PROGRAM_UNDEFINED;
713 ir->operands[operand]->accept(this);
714 if (this->result.file == PROGRAM_UNDEFINED) {
715 ir_print_visitor v;
716 printf("Failed to get tree for expression operand:\n");
717 ir->operands[operand]->accept(&v);
718 exit(1);
719 }
720 op[operand] = this->result;
721
722 /* Matrix expression operands should have been broken down to vector
723 * operations already.
724 */
725 assert(!ir->operands[operand]->type->is_matrix());
726 }
727
728 this->result.file = PROGRAM_UNDEFINED;
729
730 /* Storage for our result. Ideally for an assignment we'd be using
731 * the actual storage for the result here, instead.
732 */
733 result_src = get_temp(ir->type);
734 /* convenience for the emit functions below. */
735 result_dst = ir_to_mesa_dst_reg_from_src(result_src);
736 /* Limit writes to the channels that will be used by result_src later.
737 * This does limit this temp's use as a temporary for multi-instruction
738 * sequences.
739 */
740 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
741
742 switch (ir->operation) {
743 case ir_unop_logic_not:
744 ir_to_mesa_emit_op2(ir, OPCODE_SEQ, result_dst,
745 op[0], src_reg_for_float(0.0));
746 break;
747 case ir_unop_neg:
748 op[0].negate = ~op[0].negate;
749 result_src = op[0];
750 break;
751 case ir_unop_abs:
752 ir_to_mesa_emit_op1(ir, OPCODE_ABS, result_dst, op[0]);
753 break;
754 case ir_unop_sign:
755 ir_to_mesa_emit_op1(ir, OPCODE_SSG, result_dst, op[0]);
756 break;
757 case ir_unop_rcp:
758 ir_to_mesa_emit_scalar_op1(ir, OPCODE_RCP, result_dst, op[0]);
759 break;
760
761 case ir_unop_exp2:
762 ir_to_mesa_emit_scalar_op1(ir, OPCODE_EX2, result_dst, op[0]);
763 break;
764 case ir_unop_exp:
765 case ir_unop_log:
766 assert(!"not reached: should be handled by ir_explog_to_explog2");
767 break;
768 case ir_unop_log2:
769 ir_to_mesa_emit_scalar_op1(ir, OPCODE_LG2, result_dst, op[0]);
770 break;
771 case ir_unop_sin:
772 ir_to_mesa_emit_scalar_op1(ir, OPCODE_SIN, result_dst, op[0]);
773 break;
774 case ir_unop_cos:
775 ir_to_mesa_emit_scalar_op1(ir, OPCODE_COS, result_dst, op[0]);
776 break;
777
778 case ir_unop_dFdx:
779 ir_to_mesa_emit_op1(ir, OPCODE_DDX, result_dst, op[0]);
780 break;
781 case ir_unop_dFdy:
782 ir_to_mesa_emit_op1(ir, OPCODE_DDY, result_dst, op[0]);
783 break;
784
785 case ir_binop_add:
786 ir_to_mesa_emit_op2(ir, OPCODE_ADD, result_dst, op[0], op[1]);
787 break;
788 case ir_binop_sub:
789 ir_to_mesa_emit_op2(ir, OPCODE_SUB, result_dst, op[0], op[1]);
790 break;
791
792 case ir_binop_mul:
793 ir_to_mesa_emit_op2(ir, OPCODE_MUL, result_dst, op[0], op[1]);
794 break;
795 case ir_binop_div:
796 assert(!"not reached: should be handled by ir_div_to_mul_rcp");
797 case ir_binop_mod:
798 assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
799 break;
800
801 case ir_binop_less:
802 ir_to_mesa_emit_op2(ir, OPCODE_SLT, result_dst, op[0], op[1]);
803 break;
804 case ir_binop_greater:
805 ir_to_mesa_emit_op2(ir, OPCODE_SGT, result_dst, op[0], op[1]);
806 break;
807 case ir_binop_lequal:
808 ir_to_mesa_emit_op2(ir, OPCODE_SLE, result_dst, op[0], op[1]);
809 break;
810 case ir_binop_gequal:
811 ir_to_mesa_emit_op2(ir, OPCODE_SGE, result_dst, op[0], op[1]);
812 break;
813 case ir_binop_equal:
814 ir_to_mesa_emit_op2(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
815 break;
816 case ir_binop_logic_xor:
817 case ir_binop_nequal:
818 ir_to_mesa_emit_op2(ir, OPCODE_SNE, result_dst, op[0], op[1]);
819 break;
820
821 case ir_binop_logic_or:
822 /* This could be a saturated add and skip the SNE. */
823 ir_to_mesa_emit_op2(ir, OPCODE_ADD,
824 result_dst,
825 op[0], op[1]);
826
827 ir_to_mesa_emit_op2(ir, OPCODE_SNE,
828 result_dst,
829 result_src, src_reg_for_float(0.0));
830 break;
831
832 case ir_binop_logic_and:
833 /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
834 ir_to_mesa_emit_op2(ir, OPCODE_MUL,
835 result_dst,
836 op[0], op[1]);
837 break;
838
839 case ir_binop_dot:
840 if (ir->operands[0]->type == vec4_type) {
841 assert(ir->operands[1]->type == vec4_type);
842 ir_to_mesa_emit_op2(ir, OPCODE_DP4,
843 result_dst,
844 op[0], op[1]);
845 } else if (ir->operands[0]->type == vec3_type) {
846 assert(ir->operands[1]->type == vec3_type);
847 ir_to_mesa_emit_op2(ir, OPCODE_DP3,
848 result_dst,
849 op[0], op[1]);
850 } else if (ir->operands[0]->type == vec2_type) {
851 assert(ir->operands[1]->type == vec2_type);
852 ir_to_mesa_emit_op2(ir, OPCODE_DP2,
853 result_dst,
854 op[0], op[1]);
855 }
856 break;
857
858 case ir_binop_cross:
859 ir_to_mesa_emit_op2(ir, OPCODE_XPD, result_dst, op[0], op[1]);
860 break;
861
862 case ir_unop_sqrt:
863 ir_to_mesa_emit_scalar_op1(ir, OPCODE_RSQ, result_dst, op[0]);
864 ir_to_mesa_emit_scalar_op1(ir, OPCODE_RCP, result_dst, result_src);
865 /* For incoming channels < 0, set the result to 0. */
866 ir_to_mesa_emit_op3(ir, OPCODE_CMP, result_dst,
867 op[0], src_reg_for_float(0.0), result_src);
868 break;
869 case ir_unop_rsq:
870 ir_to_mesa_emit_scalar_op1(ir, OPCODE_RSQ, result_dst, op[0]);
871 break;
872 case ir_unop_i2f:
873 case ir_unop_b2f:
874 case ir_unop_b2i:
875 /* Mesa IR lacks types, ints are stored as truncated floats. */
876 result_src = op[0];
877 break;
878 case ir_unop_f2i:
879 ir_to_mesa_emit_op1(ir, OPCODE_TRUNC, result_dst, op[0]);
880 break;
881 case ir_unop_f2b:
882 case ir_unop_i2b:
883 ir_to_mesa_emit_op2(ir, OPCODE_SNE, result_dst,
884 result_src, src_reg_for_float(0.0));
885 break;
886 case ir_unop_trunc:
887 ir_to_mesa_emit_op1(ir, OPCODE_TRUNC, result_dst, op[0]);
888 break;
889 case ir_unop_ceil:
890 op[0].negate = ~op[0].negate;
891 ir_to_mesa_emit_op1(ir, OPCODE_FLR, result_dst, op[0]);
892 result_src.negate = ~result_src.negate;
893 break;
894 case ir_unop_floor:
895 ir_to_mesa_emit_op1(ir, OPCODE_FLR, result_dst, op[0]);
896 break;
897 case ir_unop_fract:
898 ir_to_mesa_emit_op1(ir, OPCODE_FRC, result_dst, op[0]);
899 break;
900
901 case ir_binop_min:
902 ir_to_mesa_emit_op2(ir, OPCODE_MIN, result_dst, op[0], op[1]);
903 break;
904 case ir_binop_max:
905 ir_to_mesa_emit_op2(ir, OPCODE_MAX, result_dst, op[0], op[1]);
906 break;
907 case ir_binop_pow:
908 ir_to_mesa_emit_scalar_op2(ir, OPCODE_POW, result_dst, op[0], op[1]);
909 break;
910
911 case ir_unop_bit_not:
912 case ir_unop_u2f:
913 case ir_binop_lshift:
914 case ir_binop_rshift:
915 case ir_binop_bit_and:
916 case ir_binop_bit_xor:
917 case ir_binop_bit_or:
918 assert(!"GLSL 1.30 features unsupported");
919 break;
920 }
921
922 this->result = result_src;
923 }
924
925
926 void
927 ir_to_mesa_visitor::visit(ir_swizzle *ir)
928 {
929 ir_to_mesa_src_reg src_reg;
930 int i;
931 int swizzle[4];
932
933 /* Note that this is only swizzles in expressions, not those on the left
934 * hand side of an assignment, which do write masking. See ir_assignment
935 * for that.
936 */
937
938 ir->val->accept(this);
939 src_reg = this->result;
940 assert(src_reg.file != PROGRAM_UNDEFINED);
941
942 for (i = 0; i < 4; i++) {
943 if (i < ir->type->vector_elements) {
944 switch (i) {
945 case 0:
946 swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.x);
947 break;
948 case 1:
949 swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.y);
950 break;
951 case 2:
952 swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.z);
953 break;
954 case 3:
955 swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.w);
956 break;
957 }
958 } else {
959 /* If the type is smaller than a vec4, replicate the last
960 * channel out.
961 */
962 swizzle[i] = swizzle[ir->type->vector_elements - 1];
963 }
964 }
965
966 src_reg.swizzle = MAKE_SWIZZLE4(swizzle[0],
967 swizzle[1],
968 swizzle[2],
969 swizzle[3]);
970
971 this->result = src_reg;
972 }
973
974 static const struct {
975 const char *name;
976 const char *field;
977 int tokens[STATE_LENGTH];
978 int swizzle;
979 bool array_indexed;
980 } statevars[] = {
981 {"gl_DepthRange", "near",
982 {STATE_DEPTH_RANGE, 0, 0}, SWIZZLE_XXXX},
983 {"gl_DepthRange", "far",
984 {STATE_DEPTH_RANGE, 0, 0}, SWIZZLE_YYYY},
985 {"gl_DepthRange", "diff",
986 {STATE_DEPTH_RANGE, 0, 0}, SWIZZLE_ZZZZ},
987
988 {"gl_ClipPlane", NULL,
989 {STATE_CLIPPLANE, 0, 0}, SWIZZLE_XYZW, true}
990 ,
991 {"gl_Point", "size",
992 {STATE_POINT_SIZE}, SWIZZLE_XXXX},
993 {"gl_Point", "sizeMin",
994 {STATE_POINT_SIZE}, SWIZZLE_YYYY},
995 {"gl_Point", "sizeMax",
996 {STATE_POINT_SIZE}, SWIZZLE_ZZZZ},
997 {"gl_Point", "fadeThresholdSize",
998 {STATE_POINT_SIZE}, SWIZZLE_WWWW},
999 {"gl_Point", "distanceConstantAttenuation",
1000 {STATE_POINT_ATTENUATION}, SWIZZLE_XXXX},
1001 {"gl_Point", "distanceLinearAttenuation",
1002 {STATE_POINT_ATTENUATION}, SWIZZLE_YYYY},
1003 {"gl_Point", "distanceQuadraticAttenuation",
1004 {STATE_POINT_ATTENUATION}, SWIZZLE_ZZZZ},
1005
1006 {"gl_FrontMaterial", "emission",
1007 {STATE_MATERIAL, 0, STATE_EMISSION}, SWIZZLE_XYZW},
1008 {"gl_FrontMaterial", "ambient",
1009 {STATE_MATERIAL, 0, STATE_AMBIENT}, SWIZZLE_XYZW},
1010 {"gl_FrontMaterial", "diffuse",
1011 {STATE_MATERIAL, 0, STATE_DIFFUSE}, SWIZZLE_XYZW},
1012 {"gl_FrontMaterial", "specular",
1013 {STATE_MATERIAL, 0, STATE_SPECULAR}, SWIZZLE_XYZW},
1014 {"gl_FrontMaterial", "shininess",
1015 {STATE_MATERIAL, 0, STATE_SHININESS}, SWIZZLE_XXXX},
1016
1017 {"gl_BackMaterial", "emission",
1018 {STATE_MATERIAL, 1, STATE_EMISSION}, SWIZZLE_XYZW},
1019 {"gl_BackMaterial", "ambient",
1020 {STATE_MATERIAL, 1, STATE_AMBIENT}, SWIZZLE_XYZW},
1021 {"gl_BackMaterial", "diffuse",
1022 {STATE_MATERIAL, 1, STATE_DIFFUSE}, SWIZZLE_XYZW},
1023 {"gl_BackMaterial", "specular",
1024 {STATE_MATERIAL, 1, STATE_SPECULAR}, SWIZZLE_XYZW},
1025 {"gl_BackMaterial", "shininess",
1026 {STATE_MATERIAL, 1, STATE_SHININESS}, SWIZZLE_XXXX},
1027
1028 {"gl_LightSource", "ambient",
1029 {STATE_LIGHT, 0, STATE_AMBIENT}, SWIZZLE_XYZW, true},
1030 {"gl_LightSource", "diffuse",
1031 {STATE_LIGHT, 0, STATE_DIFFUSE}, SWIZZLE_XYZW, true},
1032 {"gl_LightSource", "specular",
1033 {STATE_LIGHT, 0, STATE_SPECULAR}, SWIZZLE_XYZW, true},
1034 {"gl_LightSource", "position",
1035 {STATE_LIGHT, 0, STATE_POSITION}, SWIZZLE_XYZW, true},
1036 {"gl_LightSource", "halfVector",
1037 {STATE_LIGHT, 0, STATE_HALF_VECTOR}, SWIZZLE_XYZW, true},
1038 {"gl_LightSource", "spotDirection",
1039 {STATE_LIGHT, 0, STATE_SPOT_DIRECTION}, SWIZZLE_XYZW, true},
1040 {"gl_LightSource", "spotCosCutoff",
1041 {STATE_LIGHT, 0, STATE_SPOT_DIRECTION}, SWIZZLE_WWWW, true},
1042 {"gl_LightSource", "spotCutoff",
1043 {STATE_LIGHT, 0, STATE_SPOT_CUTOFF}, SWIZZLE_XXXX, true},
1044 {"gl_LightSource", "spotExponent",
1045 {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_WWWW, true},
1046 {"gl_LightSource", "constantAttenuation",
1047 {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_XXXX, true},
1048 {"gl_LightSource", "linearAttenuation",
1049 {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_YYYY, true},
1050 {"gl_LightSource", "quadraticAttenuation",
1051 {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_ZZZZ, true},
1052
1053 {"gl_LightModel", NULL,
1054 {STATE_LIGHTMODEL_AMBIENT, 0}, SWIZZLE_XYZW},
1055
1056 {"gl_FrontLightModelProduct", NULL,
1057 {STATE_LIGHTMODEL_SCENECOLOR, 0}, SWIZZLE_XYZW},
1058 {"gl_BackLightModelProduct", NULL,
1059 {STATE_LIGHTMODEL_SCENECOLOR, 1}, SWIZZLE_XYZW},
1060
1061 {"gl_FrontLightProduct", "ambient",
1062 {STATE_LIGHTPROD, 0, 0, STATE_AMBIENT}, SWIZZLE_XYZW, true},
1063 {"gl_FrontLightProduct", "diffuse",
1064 {STATE_LIGHTPROD, 0, 0, STATE_DIFFUSE}, SWIZZLE_XYZW, true},
1065 {"gl_FrontLightProduct", "specular",
1066 {STATE_LIGHTPROD, 0, 0, STATE_SPECULAR}, SWIZZLE_XYZW, true},
1067
1068 {"gl_BackLightProduct", "ambient",
1069 {STATE_LIGHTPROD, 0, 1, STATE_AMBIENT}, SWIZZLE_XYZW, true},
1070 {"gl_BackLightProduct", "diffuse",
1071 {STATE_LIGHTPROD, 0, 1, STATE_DIFFUSE}, SWIZZLE_XYZW, true},
1072 {"gl_BackLightProduct", "specular",
1073 {STATE_LIGHTPROD, 0, 1, STATE_SPECULAR}, SWIZZLE_XYZW, true},
1074
1075 {"gl_TextureEnvColor", "ambient",
1076 {STATE_TEXENV_COLOR, 0}, SWIZZLE_XYZW, true},
1077
1078 {"gl_EyePlaneS", NULL,
1079 {STATE_TEXGEN, 0, STATE_TEXGEN_EYE_S}, SWIZZLE_XYZW, true},
1080 {"gl_EyePlaneT", NULL,
1081 {STATE_TEXGEN, 0, STATE_TEXGEN_EYE_T}, SWIZZLE_XYZW, true},
1082 {"gl_EyePlaneR", NULL,
1083 {STATE_TEXGEN, 0, STATE_TEXGEN_EYE_R}, SWIZZLE_XYZW, true},
1084 {"gl_EyePlaneQ", NULL,
1085 {STATE_TEXGEN, 0, STATE_TEXGEN_EYE_Q}, SWIZZLE_XYZW, true},
1086
1087 {"gl_ObjectPlaneS", NULL,
1088 {STATE_TEXGEN, 0, STATE_TEXGEN_OBJECT_S}, SWIZZLE_XYZW, true},
1089 {"gl_ObjectPlaneT", NULL,
1090 {STATE_TEXGEN, 0, STATE_TEXGEN_OBJECT_T}, SWIZZLE_XYZW, true},
1091 {"gl_ObjectPlaneR", NULL,
1092 {STATE_TEXGEN, 0, STATE_TEXGEN_OBJECT_R}, SWIZZLE_XYZW, true},
1093 {"gl_ObjectPlaneQ", NULL,
1094 {STATE_TEXGEN, 0, STATE_TEXGEN_OBJECT_Q}, SWIZZLE_XYZW, true},
1095
1096 {"gl_Fog", "color",
1097 {STATE_FOG_COLOR}, SWIZZLE_XYZW},
1098 {"gl_Fog", "density",
1099 {STATE_FOG_PARAMS}, SWIZZLE_XXXX},
1100 {"gl_Fog", "start",
1101 {STATE_FOG_PARAMS}, SWIZZLE_YYYY},
1102 {"gl_Fog", "end",
1103 {STATE_FOG_PARAMS}, SWIZZLE_ZZZZ},
1104 {"gl_Fog", "scale",
1105 {STATE_FOG_PARAMS}, SWIZZLE_WWWW},
1106 };
1107
1108 static ir_to_mesa_src_reg
1109 get_builtin_uniform_reg(struct gl_program *prog,
1110 const char *name, int array_index, const char *field)
1111 {
1112 unsigned int i;
1113 ir_to_mesa_src_reg src_reg;
1114 int tokens[STATE_LENGTH];
1115
1116 for (i = 0; i < Elements(statevars); i++) {
1117 if (strcmp(statevars[i].name, name) != 0)
1118 continue;
1119 if (!field && statevars[i].field) {
1120 assert(!"FINISHME: whole-structure state var dereference");
1121 }
1122 if (field && strcmp(statevars[i].field, field) != 0)
1123 continue;
1124 break;
1125 }
1126
1127 if (i == Elements(statevars)) {
1128 printf("builtin uniform %s%s%s not found\n",
1129 name,
1130 field ? "." : "",
1131 field ? field : "");
1132 abort();
1133 }
1134
1135 memcpy(&tokens, statevars[i].tokens, sizeof(tokens));
1136 if (statevars[i].array_indexed)
1137 tokens[1] = array_index;
1138
1139 src_reg.file = PROGRAM_STATE_VAR;
1140 src_reg.index = _mesa_add_state_reference(prog->Parameters,
1141 (gl_state_index *)tokens);
1142 src_reg.swizzle = statevars[i].swizzle;
1143 src_reg.negate = 0;
1144 src_reg.reladdr = false;
1145
1146 return src_reg;
1147 }
1148
1149 static int
1150 add_matrix_ref(struct gl_program *prog, int *tokens)
1151 {
1152 int base_pos = -1;
1153 int i;
1154
1155 /* Add a ref for each column. It looks like the reason we do
1156 * it this way is that _mesa_add_state_reference doesn't work
1157 * for things that aren't vec4s, so the tokens[2]/tokens[3]
1158 * range has to be equal.
1159 */
1160 for (i = 0; i < 4; i++) {
1161 tokens[2] = i;
1162 tokens[3] = i;
1163 int pos = _mesa_add_state_reference(prog->Parameters,
1164 (gl_state_index *)tokens);
1165 if (base_pos == -1)
1166 base_pos = pos;
1167 else
1168 assert(base_pos + i == pos);
1169 }
1170
1171 return base_pos;
1172 }
1173
1174 static variable_storage *
1175 get_builtin_matrix_ref(void *mem_ctx, struct gl_program *prog, ir_variable *var,
1176 ir_rvalue *array_index)
1177 {
1178 /*
1179 * NOTE: The ARB_vertex_program extension specified that matrices get
1180 * loaded in registers in row-major order. With GLSL, we want column-
1181 * major order. So, we need to transpose all matrices here...
1182 */
1183 static const struct {
1184 const char *name;
1185 int matrix;
1186 int modifier;
1187 } matrices[] = {
1188 { "gl_ModelViewMatrix", STATE_MODELVIEW_MATRIX, STATE_MATRIX_TRANSPOSE },
1189 { "gl_ModelViewMatrixInverse", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVTRANS },
1190 { "gl_ModelViewMatrixTranspose", STATE_MODELVIEW_MATRIX, 0 },
1191 { "gl_ModelViewMatrixInverseTranspose", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVERSE },
1192
1193 { "gl_ProjectionMatrix", STATE_PROJECTION_MATRIX, STATE_MATRIX_TRANSPOSE },
1194 { "gl_ProjectionMatrixInverse", STATE_PROJECTION_MATRIX, STATE_MATRIX_INVTRANS },
1195 { "gl_ProjectionMatrixTranspose", STATE_PROJECTION_MATRIX, 0 },
1196 { "gl_ProjectionMatrixInverseTranspose", STATE_PROJECTION_MATRIX, STATE_MATRIX_INVERSE },
1197
1198 { "gl_ModelViewProjectionMatrix", STATE_MVP_MATRIX, STATE_MATRIX_TRANSPOSE },
1199 { "gl_ModelViewProjectionMatrixInverse", STATE_MVP_MATRIX, STATE_MATRIX_INVTRANS },
1200 { "gl_ModelViewProjectionMatrixTranspose", STATE_MVP_MATRIX, 0 },
1201 { "gl_ModelViewProjectionMatrixInverseTranspose", STATE_MVP_MATRIX, STATE_MATRIX_INVERSE },
1202
1203 { "gl_TextureMatrix", STATE_TEXTURE_MATRIX, STATE_MATRIX_TRANSPOSE },
1204 { "gl_TextureMatrixInverse", STATE_TEXTURE_MATRIX, STATE_MATRIX_INVTRANS },
1205 { "gl_TextureMatrixTranspose", STATE_TEXTURE_MATRIX, 0 },
1206 { "gl_TextureMatrixInverseTranspose", STATE_TEXTURE_MATRIX, STATE_MATRIX_INVERSE },
1207
1208 { "gl_NormalMatrix", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVERSE },
1209
1210 };
1211 unsigned int i;
1212 variable_storage *entry;
1213
1214 /* C++ gets angry when we try to use an int as a gl_state_index, so we use
1215 * ints for gl_state_index. Make sure they're compatible.
1216 */
1217 assert(sizeof(gl_state_index) == sizeof(int));
1218
1219 for (i = 0; i < Elements(matrices); i++) {
1220 if (strcmp(var->name, matrices[i].name) == 0) {
1221 int tokens[STATE_LENGTH];
1222 int base_pos = -1;
1223
1224 tokens[0] = matrices[i].matrix;
1225 tokens[4] = matrices[i].modifier;
1226 if (matrices[i].matrix == STATE_TEXTURE_MATRIX) {
1227 ir_constant *index = array_index->constant_expression_value();
1228 if (index) {
1229 tokens[1] = index->value.i[0];
1230 base_pos = add_matrix_ref(prog, tokens);
1231 } else {
1232 for (i = 0; i < var->type->length; i++) {
1233 tokens[1] = i;
1234 int pos = add_matrix_ref(prog, tokens);
1235 if (base_pos == -1)
1236 base_pos = pos;
1237 else
1238 assert(base_pos + (int)i * 4 == pos);
1239 }
1240 }
1241 } else {
1242 tokens[1] = 0; /* unused array index */
1243 base_pos = add_matrix_ref(prog, tokens);
1244 }
1245 tokens[4] = matrices[i].modifier;
1246
1247 entry = new(mem_ctx) variable_storage(var,
1248 PROGRAM_STATE_VAR,
1249 base_pos);
1250
1251 return entry;
1252 }
1253 }
1254
1255 return NULL;
1256 }
1257
1258 int
1259 ir_to_mesa_visitor::add_uniform(const char *name,
1260 const glsl_type *type,
1261 ir_constant *constant)
1262 {
1263 int len;
1264
1265 if (type->is_vector() ||
1266 type->is_scalar()) {
1267 len = type->vector_elements;
1268 } else {
1269 len = type_size(type) * 4;
1270 }
1271
1272 float *values = NULL;
1273 if (constant && type->is_array()) {
1274 values = (float *)malloc(type->length * 4 * sizeof(float));
1275
1276 assert(type->fields.array->is_scalar() ||
1277 type->fields.array->is_vector() ||
1278 !"FINISHME: uniform array initializers for non-vector");
1279
1280 for (unsigned int i = 0; i < type->length; i++) {
1281 ir_constant *element = constant->array_elements[i];
1282 unsigned int c;
1283
1284 for (c = 0; c < type->fields.array->vector_elements; c++) {
1285 switch (type->fields.array->base_type) {
1286 case GLSL_TYPE_FLOAT:
1287 values[4 * i + c] = element->value.f[c];
1288 break;
1289 case GLSL_TYPE_INT:
1290 values[4 * i + c] = element->value.i[c];
1291 break;
1292 case GLSL_TYPE_UINT:
1293 values[4 * i + c] = element->value.u[c];
1294 break;
1295 case GLSL_TYPE_BOOL:
1296 values[4 * i + c] = element->value.b[c];
1297 break;
1298 default:
1299 assert(!"not reached");
1300 }
1301 }
1302 }
1303 } else if (constant) {
1304 values = (float *)malloc(16 * sizeof(float));
1305 for (unsigned int i = 0; i < type->components(); i++) {
1306 switch (type->base_type) {
1307 case GLSL_TYPE_FLOAT:
1308 values[i] = constant->value.f[i];
1309 break;
1310 case GLSL_TYPE_INT:
1311 values[i] = constant->value.i[i];
1312 break;
1313 case GLSL_TYPE_UINT:
1314 values[i] = constant->value.u[i];
1315 break;
1316 case GLSL_TYPE_BOOL:
1317 values[i] = constant->value.b[i];
1318 break;
1319 default:
1320 assert(!"not reached");
1321 }
1322 }
1323 }
1324
1325 int loc = _mesa_add_uniform(this->prog->Parameters,
1326 name,
1327 len,
1328 type->gl_type,
1329 values);
1330 free(values);
1331
1332 return loc;
1333 }
1334
1335 /* Recursively add all the members of the aggregate uniform as uniform names
1336 * to Mesa, moving those uniforms to our structured temporary.
1337 */
1338 void
1339 ir_to_mesa_visitor::add_aggregate_uniform(ir_instruction *ir,
1340 const char *name,
1341 const struct glsl_type *type,
1342 ir_constant *constant,
1343 struct ir_to_mesa_dst_reg temp)
1344 {
1345 int loc;
1346
1347 if (type->is_record()) {
1348 void *mem_ctx = talloc_new(NULL);
1349 ir_constant *field_constant = NULL;
1350
1351 if (constant)
1352 field_constant = (ir_constant *)constant->components.get_head();
1353
1354 for (unsigned int i = 0; i < type->length; i++) {
1355 const glsl_type *field_type = type->fields.structure[i].type;
1356
1357 add_aggregate_uniform(ir,
1358 talloc_asprintf(mem_ctx, "%s.%s", name,
1359 type->fields.structure[i].name),
1360 field_type, field_constant, temp);
1361 temp.index += type_size(field_type);
1362
1363 if (constant)
1364 field_constant = (ir_constant *)field_constant->next;
1365 }
1366
1367 talloc_free(mem_ctx);
1368
1369 return;
1370 }
1371
1372 assert(type->is_vector() || type->is_scalar() || !"FINISHME: other types");
1373
1374 loc = add_uniform(name, type, constant);
1375
1376 ir_to_mesa_src_reg uniform(PROGRAM_UNIFORM, loc, type);
1377
1378 for (int i = 0; i < type_size(type); i++) {
1379 ir_to_mesa_emit_op1(ir, OPCODE_MOV, temp, uniform);
1380 temp.index++;
1381 uniform.index++;
1382 }
1383 }
1384
1385
1386 void
1387 ir_to_mesa_visitor::visit(ir_dereference_variable *ir)
1388 {
1389 variable_storage *entry = find_variable_storage(ir->var);
1390 unsigned int loc;
1391
1392 if (!entry) {
1393 switch (ir->var->mode) {
1394 case ir_var_uniform:
1395 entry = get_builtin_matrix_ref(this->mem_ctx, this->prog, ir->var,
1396 NULL);
1397 if (entry)
1398 break;
1399
1400 /* FINISHME: Fix up uniform name for arrays and things */
1401 if (ir->var->type->base_type == GLSL_TYPE_SAMPLER ||
1402 (ir->var->type->base_type == GLSL_TYPE_ARRAY &&
1403 ir->var->type->fields.array->base_type == GLSL_TYPE_SAMPLER)) {
1404 int array_length;
1405
1406 if (ir->var->type->base_type == GLSL_TYPE_ARRAY)
1407 array_length = ir->var->type->length;
1408 else
1409 array_length = 1;
1410 int sampler = _mesa_add_sampler(this->prog->Parameters,
1411 ir->var->name,
1412 ir->var->type->gl_type,
1413 array_length);
1414 set_sampler_location(ir->var, sampler);
1415
1416 entry = new(mem_ctx) variable_storage(ir->var, PROGRAM_SAMPLER,
1417 sampler);
1418 this->variables.push_tail(entry);
1419 break;
1420 }
1421
1422 assert(ir->var->type->gl_type != 0 &&
1423 ir->var->type->gl_type != GL_INVALID_ENUM);
1424
1425 /* Oh, the joy of aggregate types in Mesa. Like constants,
1426 * we can only really do vec4s. So, make a temp, chop the
1427 * aggregate up into vec4s, and move those vec4s to the temp.
1428 */
1429 if (ir->var->type->is_record()) {
1430 ir_to_mesa_src_reg temp = get_temp(ir->var->type);
1431
1432 entry = new(mem_ctx) variable_storage(ir->var,
1433 temp.file,
1434 temp.index);
1435 this->variables.push_tail(entry);
1436
1437 add_aggregate_uniform(ir->var, ir->var->name, ir->var->type,
1438 ir->var->constant_value,
1439 ir_to_mesa_dst_reg_from_src(temp));
1440 break;
1441 }
1442
1443 loc = add_uniform(ir->var->name,
1444 ir->var->type,
1445 ir->var->constant_value);
1446
1447 /* Always mark the uniform used at this point. If it isn't
1448 * used, dead code elimination should have nuked the decl already.
1449 */
1450 this->prog->Parameters->Parameters[loc].Used = GL_TRUE;
1451
1452 entry = new(mem_ctx) variable_storage(ir->var, PROGRAM_UNIFORM, loc);
1453 this->variables.push_tail(entry);
1454 break;
1455 case ir_var_in:
1456 case ir_var_out:
1457 case ir_var_inout:
1458 /* The linker assigns locations for varyings and attributes,
1459 * including deprecated builtins (like gl_Color), user-assign
1460 * generic attributes (glBindVertexLocation), and
1461 * user-defined varyings.
1462 *
1463 * FINISHME: We would hit this path for function arguments. Fix!
1464 */
1465 assert(ir->var->location != -1);
1466 if (ir->var->mode == ir_var_in ||
1467 ir->var->mode == ir_var_inout) {
1468 entry = new(mem_ctx) variable_storage(ir->var,
1469 PROGRAM_INPUT,
1470 ir->var->location);
1471
1472 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
1473 ir->var->location >= VERT_ATTRIB_GENERIC0) {
1474 _mesa_add_attribute(prog->Attributes,
1475 ir->var->name,
1476 _mesa_sizeof_glsl_type(ir->var->type->gl_type),
1477 ir->var->type->gl_type,
1478 ir->var->location - VERT_ATTRIB_GENERIC0);
1479 }
1480 } else {
1481 entry = new(mem_ctx) variable_storage(ir->var,
1482 PROGRAM_OUTPUT,
1483 ir->var->location);
1484 }
1485
1486 break;
1487 case ir_var_auto:
1488 case ir_var_temporary:
1489 entry = new(mem_ctx) variable_storage(ir->var, PROGRAM_TEMPORARY,
1490 this->next_temp);
1491 this->variables.push_tail(entry);
1492
1493 next_temp += type_size(ir->var->type);
1494 break;
1495 }
1496
1497 if (!entry) {
1498 printf("Failed to make storage for %s\n", ir->var->name);
1499 exit(1);
1500 }
1501 }
1502
1503 this->result = ir_to_mesa_src_reg(entry->file, entry->index, ir->var->type);
1504 }
1505
1506 void
1507 ir_to_mesa_visitor::visit(ir_dereference_array *ir)
1508 {
1509 ir_variable *var = ir->variable_referenced();
1510 ir_constant *index;
1511 ir_to_mesa_src_reg src_reg;
1512 ir_dereference_variable *deref_var = ir->array->as_dereference_variable();
1513 int element_size = type_size(ir->type);
1514
1515 index = ir->array_index->constant_expression_value();
1516
1517 if (deref_var && strncmp(deref_var->var->name,
1518 "gl_TextureMatrix",
1519 strlen("gl_TextureMatrix")) == 0) {
1520 struct variable_storage *entry;
1521
1522 entry = get_builtin_matrix_ref(this->mem_ctx, this->prog, deref_var->var,
1523 ir->array_index);
1524 assert(entry);
1525
1526 ir_to_mesa_src_reg src_reg(entry->file, entry->index, ir->type);
1527
1528 if (index) {
1529 src_reg.reladdr = NULL;
1530 } else {
1531 ir_to_mesa_src_reg index_reg = get_temp(glsl_type::float_type);
1532
1533 ir->array_index->accept(this);
1534 ir_to_mesa_emit_op2(ir, OPCODE_MUL,
1535 ir_to_mesa_dst_reg_from_src(index_reg),
1536 this->result, src_reg_for_float(element_size));
1537
1538 src_reg.reladdr = talloc(mem_ctx, ir_to_mesa_src_reg);
1539 memcpy(src_reg.reladdr, &index_reg, sizeof(index_reg));
1540 }
1541
1542 this->result = src_reg;
1543 return;
1544 }
1545
1546 if (strncmp(var->name, "gl_", 3) == 0 && var->mode == ir_var_uniform &&
1547 !var->type->is_matrix()) {
1548 ir_dereference_record *record = NULL;
1549 if (ir->array->ir_type == ir_type_dereference_record)
1550 record = (ir_dereference_record *)ir->array;
1551
1552 assert(index || !"FINISHME: variable-indexed builtin uniform access");
1553
1554 this->result = get_builtin_uniform_reg(prog,
1555 var->name,
1556 index->value.i[0],
1557 record ? record->field : NULL);
1558 }
1559
1560 ir->array->accept(this);
1561 src_reg = this->result;
1562
1563 if (index) {
1564 src_reg.index += index->value.i[0] * element_size;
1565 } else {
1566 ir_to_mesa_src_reg array_base = this->result;
1567 /* Variable index array dereference. It eats the "vec4" of the
1568 * base of the array and an index that offsets the Mesa register
1569 * index.
1570 */
1571 ir->array_index->accept(this);
1572
1573 ir_to_mesa_src_reg index_reg;
1574
1575 if (element_size == 1) {
1576 index_reg = this->result;
1577 } else {
1578 index_reg = get_temp(glsl_type::float_type);
1579
1580 ir_to_mesa_emit_op2(ir, OPCODE_MUL,
1581 ir_to_mesa_dst_reg_from_src(index_reg),
1582 this->result, src_reg_for_float(element_size));
1583 }
1584
1585 src_reg.reladdr = talloc(mem_ctx, ir_to_mesa_src_reg);
1586 memcpy(src_reg.reladdr, &index_reg, sizeof(index_reg));
1587 }
1588
1589 /* If the type is smaller than a vec4, replicate the last channel out. */
1590 if (ir->type->is_scalar() || ir->type->is_vector())
1591 src_reg.swizzle = swizzle_for_size(ir->type->vector_elements);
1592 else
1593 src_reg.swizzle = SWIZZLE_NOOP;
1594
1595 this->result = src_reg;
1596 }
1597
1598 void
1599 ir_to_mesa_visitor::visit(ir_dereference_record *ir)
1600 {
1601 unsigned int i;
1602 const glsl_type *struct_type = ir->record->type;
1603 int offset = 0;
1604 ir_variable *var = ir->record->variable_referenced();
1605
1606 if (strncmp(var->name, "gl_", 3) == 0 && var->mode == ir_var_uniform) {
1607 assert(var);
1608
1609 this->result = get_builtin_uniform_reg(prog,
1610 var->name,
1611 0,
1612 ir->field);
1613 return;
1614 }
1615
1616 ir->record->accept(this);
1617
1618 for (i = 0; i < struct_type->length; i++) {
1619 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1620 break;
1621 offset += type_size(struct_type->fields.structure[i].type);
1622 }
1623 this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1624 this->result.index += offset;
1625 }
1626
1627 /**
1628 * We want to be careful in assignment setup to hit the actual storage
1629 * instead of potentially using a temporary like we might with the
1630 * ir_dereference handler.
1631 */
1632 static struct ir_to_mesa_dst_reg
1633 get_assignment_lhs(ir_dereference *ir, ir_to_mesa_visitor *v,
1634 ir_to_mesa_src_reg *r)
1635 {
1636 /* The LHS must be a dereference. If the LHS is a variable indexed array
1637 * access of a vector, it must be separated into a series conditional moves
1638 * before reaching this point (see ir_vec_index_to_cond_assign).
1639 */
1640 assert(ir->as_dereference());
1641 ir_dereference_array *deref_array = ir->as_dereference_array();
1642 if (deref_array) {
1643 assert(!deref_array->array->type->is_vector());
1644 }
1645
1646 /* Use the rvalue deref handler for the most part. We'll ignore
1647 * swizzles in it and write swizzles using writemask, though.
1648 */
1649 ir->accept(v);
1650 return ir_to_mesa_dst_reg_from_src(v->result);
1651 }
1652
1653 void
1654 ir_to_mesa_visitor::visit(ir_assignment *ir)
1655 {
1656 struct ir_to_mesa_dst_reg l;
1657 struct ir_to_mesa_src_reg r;
1658 int i;
1659
1660 ir->rhs->accept(this);
1661 r = this->result;
1662
1663 l = get_assignment_lhs(ir->lhs, this, &r);
1664
1665 /* FINISHME: This should really set to the correct maximal writemask for each
1666 * FINISHME: component written (in the loops below). This case can only
1667 * FINISHME: occur for matrices, arrays, and structures.
1668 */
1669 if (ir->write_mask == 0) {
1670 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
1671 l.writemask = WRITEMASK_XYZW;
1672 } else if (ir->lhs->type->is_scalar()) {
1673 /* FINISHME: This hack makes writing to gl_FragData, which lives in the
1674 * FINISHME: W component of fragment shader output zero, work correctly.
1675 */
1676 l.writemask = WRITEMASK_XYZW;
1677 } else {
1678 assert(ir->lhs->type->is_vector());
1679 l.writemask = ir->write_mask;
1680 }
1681
1682 assert(l.file != PROGRAM_UNDEFINED);
1683 assert(r.file != PROGRAM_UNDEFINED);
1684
1685 if (ir->condition) {
1686 ir_to_mesa_src_reg condition;
1687
1688 ir->condition->accept(this);
1689 condition = this->result;
1690
1691 /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves,
1692 * and the condition we produced is 0.0 or 1.0. By flipping the
1693 * sign, we can choose which value OPCODE_CMP produces without
1694 * an extra computing the condition.
1695 */
1696 condition.negate = ~condition.negate;
1697 for (i = 0; i < type_size(ir->lhs->type); i++) {
1698 ir_to_mesa_emit_op3(ir, OPCODE_CMP, l,
1699 condition, r, ir_to_mesa_src_reg_from_dst(l));
1700 l.index++;
1701 r.index++;
1702 }
1703 } else {
1704 for (i = 0; i < type_size(ir->lhs->type); i++) {
1705 ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r);
1706 l.index++;
1707 r.index++;
1708 }
1709 }
1710 }
1711
1712
1713 void
1714 ir_to_mesa_visitor::visit(ir_constant *ir)
1715 {
1716 ir_to_mesa_src_reg src_reg;
1717 GLfloat stack_vals[4];
1718 GLfloat *values = stack_vals;
1719 unsigned int i;
1720
1721 /* Unfortunately, 4 floats is all we can get into
1722 * _mesa_add_unnamed_constant. So, make a temp to store an
1723 * aggregate constant and move each constant value into it. If we
1724 * get lucky, copy propagation will eliminate the extra moves.
1725 */
1726
1727 if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1728 ir_to_mesa_src_reg temp_base = get_temp(ir->type);
1729 ir_to_mesa_dst_reg temp = ir_to_mesa_dst_reg_from_src(temp_base);
1730
1731 foreach_iter(exec_list_iterator, iter, ir->components) {
1732 ir_constant *field_value = (ir_constant *)iter.get();
1733 int size = type_size(field_value->type);
1734
1735 assert(size > 0);
1736
1737 field_value->accept(this);
1738 src_reg = this->result;
1739
1740 for (i = 0; i < (unsigned int)size; i++) {
1741 ir_to_mesa_emit_op1(ir, OPCODE_MOV, temp, src_reg);
1742
1743 src_reg.index++;
1744 temp.index++;
1745 }
1746 }
1747 this->result = temp_base;
1748 return;
1749 }
1750
1751 if (ir->type->is_array()) {
1752 ir_to_mesa_src_reg temp_base = get_temp(ir->type);
1753 ir_to_mesa_dst_reg temp = ir_to_mesa_dst_reg_from_src(temp_base);
1754 int size = type_size(ir->type->fields.array);
1755
1756 assert(size > 0);
1757
1758 for (i = 0; i < ir->type->length; i++) {
1759 ir->array_elements[i]->accept(this);
1760 src_reg = this->result;
1761 for (int j = 0; j < size; j++) {
1762 ir_to_mesa_emit_op1(ir, OPCODE_MOV, temp, src_reg);
1763
1764 src_reg.index++;
1765 temp.index++;
1766 }
1767 }
1768 this->result = temp_base;
1769 return;
1770 }
1771
1772 if (ir->type->is_matrix()) {
1773 ir_to_mesa_src_reg mat = get_temp(ir->type);
1774 ir_to_mesa_dst_reg mat_column = ir_to_mesa_dst_reg_from_src(mat);
1775
1776 for (i = 0; i < ir->type->matrix_columns; i++) {
1777 assert(ir->type->base_type == GLSL_TYPE_FLOAT);
1778 values = &ir->value.f[i * ir->type->vector_elements];
1779
1780 src_reg = ir_to_mesa_src_reg(PROGRAM_CONSTANT, -1, NULL);
1781 src_reg.index = _mesa_add_unnamed_constant(this->prog->Parameters,
1782 values,
1783 ir->type->vector_elements,
1784 &src_reg.swizzle);
1785 ir_to_mesa_emit_op1(ir, OPCODE_MOV, mat_column, src_reg);
1786
1787 mat_column.index++;
1788 }
1789
1790 this->result = mat;
1791 }
1792
1793 src_reg.file = PROGRAM_CONSTANT;
1794 switch (ir->type->base_type) {
1795 case GLSL_TYPE_FLOAT:
1796 values = &ir->value.f[0];
1797 break;
1798 case GLSL_TYPE_UINT:
1799 for (i = 0; i < ir->type->vector_elements; i++) {
1800 values[i] = ir->value.u[i];
1801 }
1802 break;
1803 case GLSL_TYPE_INT:
1804 for (i = 0; i < ir->type->vector_elements; i++) {
1805 values[i] = ir->value.i[i];
1806 }
1807 break;
1808 case GLSL_TYPE_BOOL:
1809 for (i = 0; i < ir->type->vector_elements; i++) {
1810 values[i] = ir->value.b[i];
1811 }
1812 break;
1813 default:
1814 assert(!"Non-float/uint/int/bool constant");
1815 }
1816
1817 this->result = ir_to_mesa_src_reg(PROGRAM_CONSTANT, -1, ir->type);
1818 this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters,
1819 values,
1820 ir->type->vector_elements,
1821 &this->result.swizzle);
1822 }
1823
1824 function_entry *
1825 ir_to_mesa_visitor::get_function_signature(ir_function_signature *sig)
1826 {
1827 function_entry *entry;
1828
1829 foreach_iter(exec_list_iterator, iter, this->function_signatures) {
1830 entry = (function_entry *)iter.get();
1831
1832 if (entry->sig == sig)
1833 return entry;
1834 }
1835
1836 entry = talloc(mem_ctx, function_entry);
1837 entry->sig = sig;
1838 entry->sig_id = this->next_signature_id++;
1839 entry->bgn_inst = NULL;
1840
1841 /* Allocate storage for all the parameters. */
1842 foreach_iter(exec_list_iterator, iter, sig->parameters) {
1843 ir_variable *param = (ir_variable *)iter.get();
1844 variable_storage *storage;
1845
1846 storage = find_variable_storage(param);
1847 assert(!storage);
1848
1849 storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY,
1850 this->next_temp);
1851 this->variables.push_tail(storage);
1852
1853 this->next_temp += type_size(param->type);
1854 }
1855
1856 if (!sig->return_type->is_void()) {
1857 entry->return_reg = get_temp(sig->return_type);
1858 } else {
1859 entry->return_reg = ir_to_mesa_undef;
1860 }
1861
1862 this->function_signatures.push_tail(entry);
1863 return entry;
1864 }
1865
1866 void
1867 ir_to_mesa_visitor::visit(ir_call *ir)
1868 {
1869 ir_to_mesa_instruction *call_inst;
1870 ir_function_signature *sig = ir->get_callee();
1871 function_entry *entry = get_function_signature(sig);
1872 int i;
1873
1874 /* Process in parameters. */
1875 exec_list_iterator sig_iter = sig->parameters.iterator();
1876 foreach_iter(exec_list_iterator, iter, *ir) {
1877 ir_rvalue *param_rval = (ir_rvalue *)iter.get();
1878 ir_variable *param = (ir_variable *)sig_iter.get();
1879
1880 if (param->mode == ir_var_in ||
1881 param->mode == ir_var_inout) {
1882 variable_storage *storage = find_variable_storage(param);
1883 assert(storage);
1884
1885 param_rval->accept(this);
1886 ir_to_mesa_src_reg r = this->result;
1887
1888 ir_to_mesa_dst_reg l;
1889 l.file = storage->file;
1890 l.index = storage->index;
1891 l.reladdr = NULL;
1892 l.writemask = WRITEMASK_XYZW;
1893 l.cond_mask = COND_TR;
1894
1895 for (i = 0; i < type_size(param->type); i++) {
1896 ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r);
1897 l.index++;
1898 r.index++;
1899 }
1900 }
1901
1902 sig_iter.next();
1903 }
1904 assert(!sig_iter.has_next());
1905
1906 /* Emit call instruction */
1907 call_inst = ir_to_mesa_emit_op1(ir, OPCODE_CAL,
1908 ir_to_mesa_undef_dst, ir_to_mesa_undef);
1909 call_inst->function = entry;
1910
1911 /* Process out parameters. */
1912 sig_iter = sig->parameters.iterator();
1913 foreach_iter(exec_list_iterator, iter, *ir) {
1914 ir_rvalue *param_rval = (ir_rvalue *)iter.get();
1915 ir_variable *param = (ir_variable *)sig_iter.get();
1916
1917 if (param->mode == ir_var_out ||
1918 param->mode == ir_var_inout) {
1919 variable_storage *storage = find_variable_storage(param);
1920 assert(storage);
1921
1922 ir_to_mesa_src_reg r;
1923 r.file = storage->file;
1924 r.index = storage->index;
1925 r.reladdr = NULL;
1926 r.swizzle = SWIZZLE_NOOP;
1927 r.negate = 0;
1928
1929 param_rval->accept(this);
1930 ir_to_mesa_dst_reg l = ir_to_mesa_dst_reg_from_src(this->result);
1931
1932 for (i = 0; i < type_size(param->type); i++) {
1933 ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r);
1934 l.index++;
1935 r.index++;
1936 }
1937 }
1938
1939 sig_iter.next();
1940 }
1941 assert(!sig_iter.has_next());
1942
1943 /* Process return value. */
1944 this->result = entry->return_reg;
1945 }
1946
1947
1948 void
1949 ir_to_mesa_visitor::visit(ir_texture *ir)
1950 {
1951 ir_to_mesa_src_reg result_src, coord, lod_info, projector;
1952 ir_to_mesa_dst_reg result_dst, coord_dst;
1953 ir_to_mesa_instruction *inst = NULL;
1954 prog_opcode opcode = OPCODE_NOP;
1955
1956 ir->coordinate->accept(this);
1957
1958 /* Put our coords in a temp. We'll need to modify them for shadow,
1959 * projection, or LOD, so the only case we'd use it as is is if
1960 * we're doing plain old texturing. Mesa IR optimization should
1961 * handle cleaning up our mess in that case.
1962 */
1963 coord = get_temp(glsl_type::vec4_type);
1964 coord_dst = ir_to_mesa_dst_reg_from_src(coord);
1965 ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst,
1966 this->result);
1967
1968 if (ir->projector) {
1969 ir->projector->accept(this);
1970 projector = this->result;
1971 }
1972
1973 /* Storage for our result. Ideally for an assignment we'd be using
1974 * the actual storage for the result here, instead.
1975 */
1976 result_src = get_temp(glsl_type::vec4_type);
1977 result_dst = ir_to_mesa_dst_reg_from_src(result_src);
1978
1979 switch (ir->op) {
1980 case ir_tex:
1981 opcode = OPCODE_TEX;
1982 break;
1983 case ir_txb:
1984 opcode = OPCODE_TXB;
1985 ir->lod_info.bias->accept(this);
1986 lod_info = this->result;
1987 break;
1988 case ir_txl:
1989 opcode = OPCODE_TXL;
1990 ir->lod_info.lod->accept(this);
1991 lod_info = this->result;
1992 break;
1993 case ir_txd:
1994 case ir_txf:
1995 assert(!"GLSL 1.30 features unsupported");
1996 break;
1997 }
1998
1999 if (ir->projector) {
2000 if (opcode == OPCODE_TEX) {
2001 /* Slot the projector in as the last component of the coord. */
2002 coord_dst.writemask = WRITEMASK_W;
2003 ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst, projector);
2004 coord_dst.writemask = WRITEMASK_XYZW;
2005 opcode = OPCODE_TXP;
2006 } else {
2007 ir_to_mesa_src_reg coord_w = coord;
2008 coord_w.swizzle = SWIZZLE_WWWW;
2009
2010 /* For the other TEX opcodes there's no projective version
2011 * since the last slot is taken up by lod info. Do the
2012 * projective divide now.
2013 */
2014 coord_dst.writemask = WRITEMASK_W;
2015 ir_to_mesa_emit_op1(ir, OPCODE_RCP, coord_dst, projector);
2016
2017 coord_dst.writemask = WRITEMASK_XYZ;
2018 ir_to_mesa_emit_op2(ir, OPCODE_MUL, coord_dst, coord, coord_w);
2019
2020 coord_dst.writemask = WRITEMASK_XYZW;
2021 coord.swizzle = SWIZZLE_XYZW;
2022 }
2023 }
2024
2025 if (ir->shadow_comparitor) {
2026 /* Slot the shadow value in as the second to last component of the
2027 * coord.
2028 */
2029 ir->shadow_comparitor->accept(this);
2030 coord_dst.writemask = WRITEMASK_Z;
2031 ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst, this->result);
2032 coord_dst.writemask = WRITEMASK_XYZW;
2033 }
2034
2035 if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) {
2036 /* Mesa IR stores lod or lod bias in the last channel of the coords. */
2037 coord_dst.writemask = WRITEMASK_W;
2038 ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst, lod_info);
2039 coord_dst.writemask = WRITEMASK_XYZW;
2040 }
2041
2042 inst = ir_to_mesa_emit_op1(ir, opcode, result_dst, coord);
2043
2044 if (ir->shadow_comparitor)
2045 inst->tex_shadow = GL_TRUE;
2046
2047 ir_variable *sampler = ir->sampler->variable_referenced();
2048
2049 /* generate the mapping, remove when we generate storage at
2050 * declaration time
2051 */
2052 ir->sampler->accept(this);
2053
2054 inst->sampler = get_sampler_location(sampler);
2055
2056 ir_dereference_array *sampler_array = ir->sampler->as_dereference_array();
2057 if (sampler_array) {
2058 ir_constant *array_index =
2059 sampler_array->array_index->constant_expression_value();
2060
2061 /* GLSL 1.10 and 1.20 allowed variable sampler array indices,
2062 * while GLSL 1.30 requires that the array indices be constant
2063 * integer expressions. We don't expect any driver to actually
2064 * work with a really variable array index, and in 1.20 all that
2065 * would work would be an unrolled loop counter, so assert that
2066 * we ended up with a constant at least..
2067 */
2068 assert(array_index);
2069 inst->sampler += array_index->value.i[0];
2070 }
2071
2072 const glsl_type *sampler_type = sampler->type;
2073 while (sampler_type->base_type == GLSL_TYPE_ARRAY)
2074 sampler_type = sampler_type->fields.array;
2075
2076 switch (sampler_type->sampler_dimensionality) {
2077 case GLSL_SAMPLER_DIM_1D:
2078 inst->tex_target = (sampler_type->sampler_array)
2079 ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
2080 break;
2081 case GLSL_SAMPLER_DIM_2D:
2082 inst->tex_target = (sampler_type->sampler_array)
2083 ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
2084 break;
2085 case GLSL_SAMPLER_DIM_3D:
2086 inst->tex_target = TEXTURE_3D_INDEX;
2087 break;
2088 case GLSL_SAMPLER_DIM_CUBE:
2089 inst->tex_target = TEXTURE_CUBE_INDEX;
2090 break;
2091 default:
2092 assert(!"FINISHME: other texture targets");
2093 }
2094
2095 this->result = result_src;
2096 }
2097
2098 void
2099 ir_to_mesa_visitor::visit(ir_return *ir)
2100 {
2101 assert(current_function);
2102
2103 if (ir->get_value()) {
2104 ir_to_mesa_dst_reg l;
2105 int i;
2106
2107 ir->get_value()->accept(this);
2108 ir_to_mesa_src_reg r = this->result;
2109
2110 l = ir_to_mesa_dst_reg_from_src(current_function->return_reg);
2111
2112 for (i = 0; i < type_size(current_function->sig->return_type); i++) {
2113 ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r);
2114 l.index++;
2115 r.index++;
2116 }
2117 }
2118
2119 ir_to_mesa_emit_op0(ir, OPCODE_RET);
2120 }
2121
2122 void
2123 ir_to_mesa_visitor::visit(ir_discard *ir)
2124 {
2125 assert(ir->condition == NULL); /* FINISHME */
2126
2127 ir_to_mesa_emit_op0(ir, OPCODE_KIL_NV);
2128 }
2129
2130 void
2131 ir_to_mesa_visitor::visit(ir_if *ir)
2132 {
2133 ir_to_mesa_instruction *cond_inst, *if_inst, *else_inst = NULL;
2134 ir_to_mesa_instruction *prev_inst;
2135
2136 prev_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
2137
2138 ir->condition->accept(this);
2139 assert(this->result.file != PROGRAM_UNDEFINED);
2140
2141 if (ctx->Shader.EmitCondCodes) {
2142 cond_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
2143
2144 /* See if we actually generated any instruction for generating
2145 * the condition. If not, then cook up a move to a temp so we
2146 * have something to set cond_update on.
2147 */
2148 if (cond_inst == prev_inst) {
2149 ir_to_mesa_src_reg temp = get_temp(glsl_type::bool_type);
2150 cond_inst = ir_to_mesa_emit_op1(ir->condition, OPCODE_MOV,
2151 ir_to_mesa_dst_reg_from_src(temp),
2152 result);
2153 }
2154 cond_inst->cond_update = GL_TRUE;
2155
2156 if_inst = ir_to_mesa_emit_op0(ir->condition, OPCODE_IF);
2157 if_inst->dst_reg.cond_mask = COND_NE;
2158 } else {
2159 if_inst = ir_to_mesa_emit_op1(ir->condition,
2160 OPCODE_IF, ir_to_mesa_undef_dst,
2161 this->result);
2162 }
2163
2164 this->instructions.push_tail(if_inst);
2165
2166 visit_exec_list(&ir->then_instructions, this);
2167
2168 if (!ir->else_instructions.is_empty()) {
2169 else_inst = ir_to_mesa_emit_op0(ir->condition, OPCODE_ELSE);
2170 visit_exec_list(&ir->else_instructions, this);
2171 }
2172
2173 if_inst = ir_to_mesa_emit_op1(ir->condition, OPCODE_ENDIF,
2174 ir_to_mesa_undef_dst, ir_to_mesa_undef);
2175 }
2176
2177 ir_to_mesa_visitor::ir_to_mesa_visitor()
2178 {
2179 result.file = PROGRAM_UNDEFINED;
2180 next_temp = 1;
2181 next_signature_id = 1;
2182 sampler_map = NULL;
2183 current_function = NULL;
2184 }
2185
2186 ir_to_mesa_visitor::~ir_to_mesa_visitor()
2187 {
2188 if (this->sampler_map)
2189 hash_table_dtor(this->sampler_map);
2190 }
2191
2192 static struct prog_src_register
2193 mesa_src_reg_from_ir_src_reg(ir_to_mesa_src_reg reg)
2194 {
2195 struct prog_src_register mesa_reg;
2196
2197 mesa_reg.File = reg.file;
2198 assert(reg.index < (1 << INST_INDEX_BITS) - 1);
2199 mesa_reg.Index = reg.index;
2200 mesa_reg.Swizzle = reg.swizzle;
2201 mesa_reg.RelAddr = reg.reladdr != NULL;
2202 mesa_reg.Negate = reg.negate;
2203 mesa_reg.Abs = 0;
2204 mesa_reg.HasIndex2 = GL_FALSE;
2205
2206 return mesa_reg;
2207 }
2208
2209 static void
2210 set_branchtargets(ir_to_mesa_visitor *v,
2211 struct prog_instruction *mesa_instructions,
2212 int num_instructions)
2213 {
2214 int if_count = 0, loop_count = 0;
2215 int *if_stack, *loop_stack;
2216 int if_stack_pos = 0, loop_stack_pos = 0;
2217 int i, j;
2218
2219 for (i = 0; i < num_instructions; i++) {
2220 switch (mesa_instructions[i].Opcode) {
2221 case OPCODE_IF:
2222 if_count++;
2223 break;
2224 case OPCODE_BGNLOOP:
2225 loop_count++;
2226 break;
2227 case OPCODE_BRK:
2228 case OPCODE_CONT:
2229 mesa_instructions[i].BranchTarget = -1;
2230 break;
2231 default:
2232 break;
2233 }
2234 }
2235
2236 if_stack = (int *)calloc(if_count, sizeof(*if_stack));
2237 loop_stack = (int *)calloc(loop_count, sizeof(*loop_stack));
2238
2239 for (i = 0; i < num_instructions; i++) {
2240 switch (mesa_instructions[i].Opcode) {
2241 case OPCODE_IF:
2242 if_stack[if_stack_pos] = i;
2243 if_stack_pos++;
2244 break;
2245 case OPCODE_ELSE:
2246 mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
2247 if_stack[if_stack_pos - 1] = i;
2248 break;
2249 case OPCODE_ENDIF:
2250 mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
2251 if_stack_pos--;
2252 break;
2253 case OPCODE_BGNLOOP:
2254 loop_stack[loop_stack_pos] = i;
2255 loop_stack_pos++;
2256 break;
2257 case OPCODE_ENDLOOP:
2258 loop_stack_pos--;
2259 /* Rewrite any breaks/conts at this nesting level (haven't
2260 * already had a BranchTarget assigned) to point to the end
2261 * of the loop.
2262 */
2263 for (j = loop_stack[loop_stack_pos]; j < i; j++) {
2264 if (mesa_instructions[j].Opcode == OPCODE_BRK ||
2265 mesa_instructions[j].Opcode == OPCODE_CONT) {
2266 if (mesa_instructions[j].BranchTarget == -1) {
2267 mesa_instructions[j].BranchTarget = i;
2268 }
2269 }
2270 }
2271 /* The loop ends point at each other. */
2272 mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos];
2273 mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i;
2274 break;
2275 case OPCODE_CAL:
2276 foreach_iter(exec_list_iterator, iter, v->function_signatures) {
2277 function_entry *entry = (function_entry *)iter.get();
2278
2279 if (entry->sig_id == mesa_instructions[i].BranchTarget) {
2280 mesa_instructions[i].BranchTarget = entry->inst;
2281 break;
2282 }
2283 }
2284 break;
2285 default:
2286 break;
2287 }
2288 }
2289
2290 free(if_stack);
2291 }
2292
2293 static void
2294 print_program(struct prog_instruction *mesa_instructions,
2295 ir_instruction **mesa_instruction_annotation,
2296 int num_instructions)
2297 {
2298 ir_instruction *last_ir = NULL;
2299 int i;
2300 int indent = 0;
2301
2302 for (i = 0; i < num_instructions; i++) {
2303 struct prog_instruction *mesa_inst = mesa_instructions + i;
2304 ir_instruction *ir = mesa_instruction_annotation[i];
2305
2306 fprintf(stdout, "%3d: ", i);
2307
2308 if (last_ir != ir && ir) {
2309 int j;
2310
2311 for (j = 0; j < indent; j++) {
2312 fprintf(stdout, " ");
2313 }
2314 ir->print();
2315 printf("\n");
2316 last_ir = ir;
2317
2318 fprintf(stdout, " "); /* line number spacing. */
2319 }
2320
2321 indent = _mesa_fprint_instruction_opt(stdout, mesa_inst, indent,
2322 PROG_PRINT_DEBUG, NULL);
2323 }
2324 }
2325
2326 static void
2327 count_resources(struct gl_program *prog)
2328 {
2329 unsigned int i;
2330
2331 prog->SamplersUsed = 0;
2332
2333 for (i = 0; i < prog->NumInstructions; i++) {
2334 struct prog_instruction *inst = &prog->Instructions[i];
2335
2336 /* Instead of just using the uniform's value to map to a
2337 * sampler, Mesa first allocates a separate number for the
2338 * sampler (_mesa_add_sampler), then we reindex it down to a
2339 * small integer (sampler_map[], SamplersUsed), then that gets
2340 * mapped to the uniform's value, and we get an actual sampler.
2341 */
2342 if (_mesa_is_tex_instruction(inst->Opcode)) {
2343 prog->SamplerTargets[inst->TexSrcUnit] =
2344 (gl_texture_index)inst->TexSrcTarget;
2345 prog->SamplersUsed |= 1 << inst->TexSrcUnit;
2346 if (inst->TexShadow) {
2347 prog->ShadowSamplers |= 1 << inst->TexSrcUnit;
2348 }
2349 }
2350 }
2351
2352 _mesa_update_shader_textures_used(prog);
2353 }
2354
2355 /* Each stage has some uniforms in its Parameters list. The Uniforms
2356 * list for the linked shader program has a pointer to these uniforms
2357 * in each of the stage's Parameters list, so that their values can be
2358 * updated when a uniform is set.
2359 */
2360 static void
2361 link_uniforms_to_shared_uniform_list(struct gl_uniform_list *uniforms,
2362 struct gl_program *prog)
2363 {
2364 unsigned int i;
2365
2366 for (i = 0; i < prog->Parameters->NumParameters; i++) {
2367 const struct gl_program_parameter *p = prog->Parameters->Parameters + i;
2368
2369 if (p->Type == PROGRAM_UNIFORM || p->Type == PROGRAM_SAMPLER) {
2370 struct gl_uniform *uniform =
2371 _mesa_append_uniform(uniforms, p->Name, prog->Target, i);
2372 if (uniform)
2373 uniform->Initialized = p->Initialized;
2374 }
2375 }
2376 }
2377
2378 struct gl_program *
2379 get_mesa_program(GLcontext *ctx, struct gl_shader_program *shader_program,
2380 struct gl_shader *shader)
2381 {
2382 void *mem_ctx = shader_program;
2383 ir_to_mesa_visitor v;
2384 struct prog_instruction *mesa_instructions, *mesa_inst;
2385 ir_instruction **mesa_instruction_annotation;
2386 int i;
2387 struct gl_program *prog;
2388 GLenum target;
2389 const char *target_string;
2390 GLboolean progress;
2391
2392 switch (shader->Type) {
2393 case GL_VERTEX_SHADER:
2394 target = GL_VERTEX_PROGRAM_ARB;
2395 target_string = "vertex";
2396 break;
2397 case GL_FRAGMENT_SHADER:
2398 target = GL_FRAGMENT_PROGRAM_ARB;
2399 target_string = "fragment";
2400 break;
2401 default:
2402 assert(!"should not be reached");
2403 break;
2404 }
2405
2406 validate_ir_tree(shader->ir);
2407
2408 prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
2409 if (!prog)
2410 return NULL;
2411 prog->Parameters = _mesa_new_parameter_list();
2412 prog->Varying = _mesa_new_parameter_list();
2413 prog->Attributes = _mesa_new_parameter_list();
2414 v.ctx = ctx;
2415 v.prog = prog;
2416
2417 v.mem_ctx = talloc_new(NULL);
2418
2419 /* Emit Mesa IR for main(). */
2420 visit_exec_list(shader->ir, &v);
2421 v.ir_to_mesa_emit_op0(NULL, OPCODE_END);
2422
2423 /* Now emit bodies for any functions that were used. */
2424 do {
2425 progress = GL_FALSE;
2426
2427 foreach_iter(exec_list_iterator, iter, v.function_signatures) {
2428 function_entry *entry = (function_entry *)iter.get();
2429
2430 if (!entry->bgn_inst) {
2431 v.current_function = entry;
2432
2433 entry->bgn_inst = v.ir_to_mesa_emit_op0(NULL, OPCODE_BGNSUB);
2434 entry->bgn_inst->function = entry;
2435
2436 visit_exec_list(&entry->sig->body, &v);
2437
2438 ir_to_mesa_instruction *last;
2439 last = (ir_to_mesa_instruction *)v.instructions.get_tail();
2440 if (last->op != OPCODE_RET)
2441 v.ir_to_mesa_emit_op0(NULL, OPCODE_RET);
2442
2443 ir_to_mesa_instruction *end;
2444 end = v.ir_to_mesa_emit_op0(NULL, OPCODE_ENDSUB);
2445 end->function = entry;
2446
2447 progress = GL_TRUE;
2448 }
2449 }
2450 } while (progress);
2451
2452 prog->NumTemporaries = v.next_temp;
2453
2454 int num_instructions = 0;
2455 foreach_iter(exec_list_iterator, iter, v.instructions) {
2456 num_instructions++;
2457 }
2458
2459 mesa_instructions =
2460 (struct prog_instruction *)calloc(num_instructions,
2461 sizeof(*mesa_instructions));
2462 mesa_instruction_annotation = talloc_array(mem_ctx, ir_instruction *,
2463 num_instructions);
2464
2465 mesa_inst = mesa_instructions;
2466 i = 0;
2467 foreach_iter(exec_list_iterator, iter, v.instructions) {
2468 ir_to_mesa_instruction *inst = (ir_to_mesa_instruction *)iter.get();
2469
2470 mesa_inst->Opcode = inst->op;
2471 mesa_inst->CondUpdate = inst->cond_update;
2472 mesa_inst->DstReg.File = inst->dst_reg.file;
2473 mesa_inst->DstReg.Index = inst->dst_reg.index;
2474 mesa_inst->DstReg.CondMask = inst->dst_reg.cond_mask;
2475 mesa_inst->DstReg.WriteMask = inst->dst_reg.writemask;
2476 mesa_inst->DstReg.RelAddr = inst->dst_reg.reladdr != NULL;
2477 mesa_inst->SrcReg[0] = mesa_src_reg_from_ir_src_reg(inst->src_reg[0]);
2478 mesa_inst->SrcReg[1] = mesa_src_reg_from_ir_src_reg(inst->src_reg[1]);
2479 mesa_inst->SrcReg[2] = mesa_src_reg_from_ir_src_reg(inst->src_reg[2]);
2480 mesa_inst->TexSrcUnit = inst->sampler;
2481 mesa_inst->TexSrcTarget = inst->tex_target;
2482 mesa_inst->TexShadow = inst->tex_shadow;
2483 mesa_instruction_annotation[i] = inst->ir;
2484
2485 if (ctx->Shader.EmitNoIfs && mesa_inst->Opcode == OPCODE_IF) {
2486 shader_program->InfoLog =
2487 talloc_asprintf_append(shader_program->InfoLog,
2488 "Couldn't flatten if statement\n");
2489 shader_program->LinkStatus = false;
2490 }
2491
2492 switch (mesa_inst->Opcode) {
2493 case OPCODE_BGNSUB:
2494 inst->function->inst = i;
2495 mesa_inst->Comment = strdup(inst->function->sig->function_name());
2496 break;
2497 case OPCODE_ENDSUB:
2498 mesa_inst->Comment = strdup(inst->function->sig->function_name());
2499 break;
2500 case OPCODE_CAL:
2501 mesa_inst->BranchTarget = inst->function->sig_id; /* rewritten later */
2502 break;
2503 case OPCODE_ARL:
2504 prog->NumAddressRegs = 1;
2505 break;
2506 default:
2507 break;
2508 }
2509
2510 mesa_inst++;
2511 i++;
2512 }
2513
2514 set_branchtargets(&v, mesa_instructions, num_instructions);
2515
2516 if (ctx->Shader.Flags & GLSL_DUMP) {
2517 printf("\n");
2518 printf("GLSL IR for linked %s program %d:\n", target_string,
2519 shader_program->Name);
2520 _mesa_print_ir(shader->ir, NULL);
2521 printf("\n");
2522 printf("\n");
2523 printf("Mesa IR for linked %s program %d:\n", target_string,
2524 shader_program->Name);
2525 print_program(mesa_instructions, mesa_instruction_annotation,
2526 num_instructions);
2527 }
2528
2529 prog->Instructions = mesa_instructions;
2530 prog->NumInstructions = num_instructions;
2531
2532 do_set_program_inouts(shader->ir, prog);
2533 count_resources(prog);
2534
2535 _mesa_reference_program(ctx, &shader->Program, prog);
2536
2537 if ((ctx->Shader.Flags & GLSL_NO_OPT) == 0) {
2538 _mesa_optimize_program(ctx, prog);
2539 }
2540
2541 return prog;
2542 }
2543
2544 extern "C" {
2545
2546 void
2547 _mesa_glsl_compile_shader(GLcontext *ctx, struct gl_shader *shader)
2548 {
2549 struct _mesa_glsl_parse_state *state =
2550 new(shader) _mesa_glsl_parse_state(ctx, shader->Type, shader);
2551
2552 const char *source = shader->Source;
2553 state->error = preprocess(state, &source, &state->info_log,
2554 &ctx->Extensions);
2555
2556 if (!state->error) {
2557 _mesa_glsl_lexer_ctor(state, source);
2558 _mesa_glsl_parse(state);
2559 _mesa_glsl_lexer_dtor(state);
2560 }
2561
2562 shader->ir = new(shader) exec_list;
2563 if (!state->error && !state->translation_unit.is_empty())
2564 _mesa_ast_to_hir(shader->ir, state);
2565
2566 if (!state->error && !shader->ir->is_empty()) {
2567 validate_ir_tree(shader->ir);
2568
2569 /* Lowering */
2570 do_mat_op_to_vec(shader->ir);
2571 do_mod_to_fract(shader->ir);
2572 do_div_to_mul_rcp(shader->ir);
2573
2574 /* Optimization passes */
2575 bool progress;
2576 do {
2577 progress = false;
2578
2579 progress = do_if_simplification(shader->ir) || progress;
2580 progress = do_copy_propagation(shader->ir) || progress;
2581 progress = do_dead_code_local(shader->ir) || progress;
2582 progress = do_dead_code_unlinked(shader->ir) || progress;
2583 progress = do_tree_grafting(shader->ir) || progress;
2584 progress = do_constant_propagation(shader->ir) || progress;
2585 progress = do_constant_variable_unlinked(shader->ir) || progress;
2586 progress = do_constant_folding(shader->ir) || progress;
2587 progress = do_algebraic(shader->ir) || progress;
2588 progress = do_if_return(shader->ir) || progress;
2589 if (ctx->Shader.EmitNoIfs)
2590 progress = do_if_to_cond_assign(shader->ir) || progress;
2591
2592 progress = do_vec_index_to_swizzle(shader->ir) || progress;
2593 /* Do this one after the previous to let the easier pass handle
2594 * constant vector indexing.
2595 */
2596 progress = do_vec_index_to_cond_assign(shader->ir) || progress;
2597
2598 progress = do_swizzle_swizzle(shader->ir) || progress;
2599 } while (progress);
2600
2601 validate_ir_tree(shader->ir);
2602 }
2603
2604 shader->symbols = state->symbols;
2605
2606 shader->CompileStatus = !state->error;
2607 shader->InfoLog = state->info_log;
2608 shader->Version = state->language_version;
2609 memcpy(shader->builtins_to_link, state->builtins_to_link,
2610 sizeof(shader->builtins_to_link[0]) * state->num_builtins_to_link);
2611 shader->num_builtins_to_link = state->num_builtins_to_link;
2612
2613 if (ctx->Shader.Flags & GLSL_LOG) {
2614 _mesa_write_shader_to_file(shader);
2615 }
2616
2617 if (ctx->Shader.Flags & GLSL_DUMP) {
2618 printf("GLSL source for shader %d:\n", shader->Name);
2619 printf("%s\n", shader->Source);
2620
2621 if (shader->CompileStatus) {
2622 printf("GLSL IR for shader %d:\n", shader->Name);
2623 _mesa_print_ir(shader->ir, NULL);
2624 printf("\n\n");
2625 }
2626 }
2627
2628 /* Retain any live IR, but trash the rest. */
2629 reparent_ir(shader->ir, shader);
2630
2631 talloc_free(state);
2632 }
2633
2634 void
2635 _mesa_glsl_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
2636 {
2637 unsigned int i;
2638
2639 _mesa_clear_shader_program_data(ctx, prog);
2640
2641 prog->LinkStatus = GL_TRUE;
2642
2643 for (i = 0; i < prog->NumShaders; i++) {
2644 if (!prog->Shaders[i]->CompileStatus) {
2645 prog->InfoLog =
2646 talloc_asprintf_append(prog->InfoLog,
2647 "linking with uncompiled shader");
2648 prog->LinkStatus = GL_FALSE;
2649 }
2650 }
2651
2652 prog->Varying = _mesa_new_parameter_list();
2653 _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL);
2654 _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL);
2655
2656 if (prog->LinkStatus) {
2657 link_shaders(prog);
2658
2659 /* We don't use the linker's uniforms list, and cook up our own at
2660 * generate time.
2661 */
2662 free(prog->Uniforms);
2663 prog->Uniforms = _mesa_new_uniform_list();
2664 }
2665
2666 if (prog->LinkStatus) {
2667 for (i = 0; i < prog->_NumLinkedShaders; i++) {
2668 struct gl_program *linked_prog;
2669 bool ok = true;
2670
2671 linked_prog = get_mesa_program(ctx, prog,
2672 prog->_LinkedShaders[i]);
2673
2674 link_uniforms_to_shared_uniform_list(prog->Uniforms, linked_prog);
2675
2676 switch (prog->_LinkedShaders[i]->Type) {
2677 case GL_VERTEX_SHADER:
2678 _mesa_reference_vertprog(ctx, &prog->VertexProgram,
2679 (struct gl_vertex_program *)linked_prog);
2680 ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB,
2681 linked_prog);
2682 break;
2683 case GL_FRAGMENT_SHADER:
2684 _mesa_reference_fragprog(ctx, &prog->FragmentProgram,
2685 (struct gl_fragment_program *)linked_prog);
2686 ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB,
2687 linked_prog);
2688 break;
2689 }
2690 if (!ok) {
2691 prog->LinkStatus = GL_FALSE;
2692 }
2693 }
2694 }
2695 }
2696
2697 } /* extern "C" */