Merge remote branch 'origin/master' into glsl2
[mesa.git] / src / mesa / program / ir_to_mesa.cpp
1 /*
2 * Copyright (C) 2005-2007 Brian Paul All Rights Reserved.
3 * Copyright (C) 2008 VMware, Inc. All Rights Reserved.
4 * Copyright © 2010 Intel Corporation
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
24 */
25
26 /**
27 * \file ir_to_mesa.cpp
28 *
29 * Translates the IR to ARB_fragment_program text if possible,
30 * printing the result
31 */
32
33 #include <stdio.h>
34 #include "ir.h"
35 #include "ir_visitor.h"
36 #include "ir_print_visitor.h"
37 #include "ir_expression_flattening.h"
38 #include "glsl_types.h"
39 #include "glsl_parser_extras.h"
40 #include "../glsl/program.h"
41 #include "ir_optimization.h"
42 #include "ast.h"
43
44 extern "C" {
45 #include "main/mtypes.h"
46 #include "main/shaderobj.h"
47 #include "main/uniforms.h"
48 #include "program/prog_instruction.h"
49 #include "program/prog_optimize.h"
50 #include "program/prog_print.h"
51 #include "program/program.h"
52 #include "program/prog_uniform.h"
53 #include "program/prog_parameter.h"
54 }
55
56 /**
57 * This struct is a corresponding struct to Mesa prog_src_register, with
58 * wider fields.
59 */
60 typedef struct ir_to_mesa_src_reg {
61 int file; /**< PROGRAM_* from Mesa */
62 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
63 GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
64 int negate; /**< NEGATE_XYZW mask from mesa */
65 /** Register index should be offset by the integer in this reg. */
66 ir_to_mesa_src_reg *reladdr;
67 } ir_to_mesa_src_reg;
68
69 typedef struct ir_to_mesa_dst_reg {
70 int file; /**< PROGRAM_* from Mesa */
71 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
72 int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
73 GLuint cond_mask:4;
74 /** Register index should be offset by the integer in this reg. */
75 ir_to_mesa_src_reg *reladdr;
76 } ir_to_mesa_dst_reg;
77
78 extern ir_to_mesa_src_reg ir_to_mesa_undef;
79
80 class ir_to_mesa_instruction : public exec_node {
81 public:
82 enum prog_opcode op;
83 ir_to_mesa_dst_reg dst_reg;
84 ir_to_mesa_src_reg src_reg[3];
85 /** Pointer to the ir source this tree came from for debugging */
86 ir_instruction *ir;
87 GLboolean cond_update;
88 int sampler; /**< sampler index */
89 int tex_target; /**< One of TEXTURE_*_INDEX */
90 GLboolean tex_shadow;
91
92 class function_entry *function; /* Set on OPCODE_CAL or OPCODE_BGNSUB */
93 };
94
95 class variable_storage : public exec_node {
96 public:
97 variable_storage(ir_variable *var, int file, int index)
98 : file(file), index(index), var(var)
99 {
100 /* empty */
101 }
102
103 int file;
104 int index;
105 ir_variable *var; /* variable that maps to this, if any */
106 };
107
108 class function_entry : public exec_node {
109 public:
110 ir_function_signature *sig;
111
112 /**
113 * identifier of this function signature used by the program.
114 *
115 * At the point that Mesa instructions for function calls are
116 * generated, we don't know the address of the first instruction of
117 * the function body. So we make the BranchTarget that is called a
118 * small integer and rewrite them during set_branchtargets().
119 */
120 int sig_id;
121
122 /**
123 * Pointer to first instruction of the function body.
124 *
125 * Set during function body emits after main() is processed.
126 */
127 ir_to_mesa_instruction *bgn_inst;
128
129 /**
130 * Index of the first instruction of the function body in actual
131 * Mesa IR.
132 *
133 * Set after convertion from ir_to_mesa_instruction to prog_instruction.
134 */
135 int inst;
136
137 /** Storage for the return value. */
138 ir_to_mesa_src_reg return_reg;
139 };
140
141 class ir_to_mesa_visitor : public ir_visitor {
142 public:
143 ir_to_mesa_visitor();
144
145 function_entry *current_function;
146
147 GLcontext *ctx;
148 struct gl_program *prog;
149
150 int next_temp;
151
152 variable_storage *find_variable_storage(ir_variable *var);
153
154 function_entry *get_function_signature(ir_function_signature *sig);
155
156 ir_to_mesa_src_reg get_temp(const glsl_type *type);
157 void reladdr_to_temp(ir_instruction *ir,
158 ir_to_mesa_src_reg *reg, int *num_reladdr);
159
160 struct ir_to_mesa_src_reg src_reg_for_float(float val);
161
162 /**
163 * \name Visit methods
164 *
165 * As typical for the visitor pattern, there must be one \c visit method for
166 * each concrete subclass of \c ir_instruction. Virtual base classes within
167 * the hierarchy should not have \c visit methods.
168 */
169 /*@{*/
170 virtual void visit(ir_variable *);
171 virtual void visit(ir_loop *);
172 virtual void visit(ir_loop_jump *);
173 virtual void visit(ir_function_signature *);
174 virtual void visit(ir_function *);
175 virtual void visit(ir_expression *);
176 virtual void visit(ir_swizzle *);
177 virtual void visit(ir_dereference_variable *);
178 virtual void visit(ir_dereference_array *);
179 virtual void visit(ir_dereference_record *);
180 virtual void visit(ir_assignment *);
181 virtual void visit(ir_constant *);
182 virtual void visit(ir_call *);
183 virtual void visit(ir_return *);
184 virtual void visit(ir_discard *);
185 virtual void visit(ir_texture *);
186 virtual void visit(ir_if *);
187 /*@}*/
188
189 struct ir_to_mesa_src_reg result;
190
191 /** List of variable_storage */
192 exec_list variables;
193
194 /** List of function_entry */
195 exec_list function_signatures;
196 int next_signature_id;
197
198 /** List of ir_to_mesa_instruction */
199 exec_list instructions;
200
201 ir_to_mesa_instruction *ir_to_mesa_emit_op0(ir_instruction *ir,
202 enum prog_opcode op);
203
204 ir_to_mesa_instruction *ir_to_mesa_emit_op1(ir_instruction *ir,
205 enum prog_opcode op,
206 ir_to_mesa_dst_reg dst,
207 ir_to_mesa_src_reg src0);
208
209 ir_to_mesa_instruction *ir_to_mesa_emit_op2(ir_instruction *ir,
210 enum prog_opcode op,
211 ir_to_mesa_dst_reg dst,
212 ir_to_mesa_src_reg src0,
213 ir_to_mesa_src_reg src1);
214
215 ir_to_mesa_instruction *ir_to_mesa_emit_op3(ir_instruction *ir,
216 enum prog_opcode op,
217 ir_to_mesa_dst_reg dst,
218 ir_to_mesa_src_reg src0,
219 ir_to_mesa_src_reg src1,
220 ir_to_mesa_src_reg src2);
221
222 void ir_to_mesa_emit_scalar_op1(ir_instruction *ir,
223 enum prog_opcode op,
224 ir_to_mesa_dst_reg dst,
225 ir_to_mesa_src_reg src0);
226
227 void ir_to_mesa_emit_scalar_op2(ir_instruction *ir,
228 enum prog_opcode op,
229 ir_to_mesa_dst_reg dst,
230 ir_to_mesa_src_reg src0,
231 ir_to_mesa_src_reg src1);
232
233 GLboolean try_emit_mad(ir_expression *ir,
234 int mul_operand);
235
236 int *sampler_map;
237 int sampler_map_size;
238
239 void map_sampler(int location, int sampler);
240 int get_sampler_number(int location);
241
242 void *mem_ctx;
243 };
244
245 ir_to_mesa_src_reg ir_to_mesa_undef = {
246 PROGRAM_UNDEFINED, 0, SWIZZLE_NOOP, NEGATE_NONE, NULL,
247 };
248
249 ir_to_mesa_dst_reg ir_to_mesa_undef_dst = {
250 PROGRAM_UNDEFINED, 0, SWIZZLE_NOOP, COND_TR, NULL,
251 };
252
253 ir_to_mesa_dst_reg ir_to_mesa_address_reg = {
254 PROGRAM_ADDRESS, 0, WRITEMASK_X, COND_TR, NULL
255 };
256
257 static int swizzle_for_size(int size)
258 {
259 int size_swizzles[4] = {
260 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
261 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
262 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
263 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
264 };
265
266 return size_swizzles[size - 1];
267 }
268
269 ir_to_mesa_instruction *
270 ir_to_mesa_visitor::ir_to_mesa_emit_op3(ir_instruction *ir,
271 enum prog_opcode op,
272 ir_to_mesa_dst_reg dst,
273 ir_to_mesa_src_reg src0,
274 ir_to_mesa_src_reg src1,
275 ir_to_mesa_src_reg src2)
276 {
277 ir_to_mesa_instruction *inst = new(mem_ctx) ir_to_mesa_instruction();
278 int num_reladdr = 0;
279
280 /* If we have to do relative addressing, we want to load the ARL
281 * reg directly for one of the regs, and preload the other reladdr
282 * sources into temps.
283 */
284 num_reladdr += dst.reladdr != NULL;
285 num_reladdr += src0.reladdr != NULL;
286 num_reladdr += src1.reladdr != NULL;
287 num_reladdr += src2.reladdr != NULL;
288
289 reladdr_to_temp(ir, &src2, &num_reladdr);
290 reladdr_to_temp(ir, &src1, &num_reladdr);
291 reladdr_to_temp(ir, &src0, &num_reladdr);
292
293 if (dst.reladdr) {
294 ir_to_mesa_emit_op1(ir, OPCODE_ARL, ir_to_mesa_address_reg,
295 *dst.reladdr);
296
297 num_reladdr--;
298 }
299 assert(num_reladdr == 0);
300
301 inst->op = op;
302 inst->dst_reg = dst;
303 inst->src_reg[0] = src0;
304 inst->src_reg[1] = src1;
305 inst->src_reg[2] = src2;
306 inst->ir = ir;
307
308 inst->function = NULL;
309
310 this->instructions.push_tail(inst);
311
312 return inst;
313 }
314
315
316 ir_to_mesa_instruction *
317 ir_to_mesa_visitor::ir_to_mesa_emit_op2(ir_instruction *ir,
318 enum prog_opcode op,
319 ir_to_mesa_dst_reg dst,
320 ir_to_mesa_src_reg src0,
321 ir_to_mesa_src_reg src1)
322 {
323 return ir_to_mesa_emit_op3(ir, op, dst, src0, src1, ir_to_mesa_undef);
324 }
325
326 ir_to_mesa_instruction *
327 ir_to_mesa_visitor::ir_to_mesa_emit_op1(ir_instruction *ir,
328 enum prog_opcode op,
329 ir_to_mesa_dst_reg dst,
330 ir_to_mesa_src_reg src0)
331 {
332 return ir_to_mesa_emit_op3(ir, op, dst,
333 src0, ir_to_mesa_undef, ir_to_mesa_undef);
334 }
335
336 ir_to_mesa_instruction *
337 ir_to_mesa_visitor::ir_to_mesa_emit_op0(ir_instruction *ir,
338 enum prog_opcode op)
339 {
340 return ir_to_mesa_emit_op3(ir, op, ir_to_mesa_undef_dst,
341 ir_to_mesa_undef,
342 ir_to_mesa_undef,
343 ir_to_mesa_undef);
344 }
345
346 void
347 ir_to_mesa_visitor::map_sampler(int location, int sampler)
348 {
349 if (this->sampler_map_size <= location) {
350 this->sampler_map = talloc_realloc(this->mem_ctx, this->sampler_map,
351 int, location + 1);
352 this->sampler_map_size = location + 1;
353 }
354
355 this->sampler_map[location] = sampler;
356 }
357
358 int
359 ir_to_mesa_visitor::get_sampler_number(int location)
360 {
361 assert(location < this->sampler_map_size);
362 return this->sampler_map[location];
363 }
364
365 inline ir_to_mesa_dst_reg
366 ir_to_mesa_dst_reg_from_src(ir_to_mesa_src_reg reg)
367 {
368 ir_to_mesa_dst_reg dst_reg;
369
370 dst_reg.file = reg.file;
371 dst_reg.index = reg.index;
372 dst_reg.writemask = WRITEMASK_XYZW;
373 dst_reg.cond_mask = COND_TR;
374 dst_reg.reladdr = reg.reladdr;
375
376 return dst_reg;
377 }
378
379 inline ir_to_mesa_src_reg
380 ir_to_mesa_src_reg_from_dst(ir_to_mesa_dst_reg reg)
381 {
382 ir_to_mesa_src_reg src_reg;
383
384 src_reg.file = reg.file;
385 src_reg.index = reg.index;
386 src_reg.swizzle = SWIZZLE_XYZW;
387 src_reg.negate = 0;
388 src_reg.reladdr = reg.reladdr;
389
390 return src_reg;
391 }
392
393 /**
394 * Emits Mesa scalar opcodes to produce unique answers across channels.
395 *
396 * Some Mesa opcodes are scalar-only, like ARB_fp/vp. The src X
397 * channel determines the result across all channels. So to do a vec4
398 * of this operation, we want to emit a scalar per source channel used
399 * to produce dest channels.
400 */
401 void
402 ir_to_mesa_visitor::ir_to_mesa_emit_scalar_op2(ir_instruction *ir,
403 enum prog_opcode op,
404 ir_to_mesa_dst_reg dst,
405 ir_to_mesa_src_reg orig_src0,
406 ir_to_mesa_src_reg orig_src1)
407 {
408 int i, j;
409 int done_mask = ~dst.writemask;
410
411 /* Mesa RCP is a scalar operation splatting results to all channels,
412 * like ARB_fp/vp. So emit as many RCPs as necessary to cover our
413 * dst channels.
414 */
415 for (i = 0; i < 4; i++) {
416 GLuint this_mask = (1 << i);
417 ir_to_mesa_instruction *inst;
418 ir_to_mesa_src_reg src0 = orig_src0;
419 ir_to_mesa_src_reg src1 = orig_src1;
420
421 if (done_mask & this_mask)
422 continue;
423
424 GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
425 GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
426 for (j = i + 1; j < 4; j++) {
427 if (!(done_mask & (1 << j)) &&
428 GET_SWZ(src0.swizzle, j) == src0_swiz &&
429 GET_SWZ(src1.swizzle, j) == src1_swiz) {
430 this_mask |= (1 << j);
431 }
432 }
433 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
434 src0_swiz, src0_swiz);
435 src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
436 src1_swiz, src1_swiz);
437
438 inst = ir_to_mesa_emit_op2(ir, op,
439 dst,
440 src0,
441 src1);
442 inst->dst_reg.writemask = this_mask;
443 done_mask |= this_mask;
444 }
445 }
446
447 void
448 ir_to_mesa_visitor::ir_to_mesa_emit_scalar_op1(ir_instruction *ir,
449 enum prog_opcode op,
450 ir_to_mesa_dst_reg dst,
451 ir_to_mesa_src_reg src0)
452 {
453 ir_to_mesa_src_reg undef = ir_to_mesa_undef;
454
455 undef.swizzle = SWIZZLE_XXXX;
456
457 ir_to_mesa_emit_scalar_op2(ir, op, dst, src0, undef);
458 }
459
460 struct ir_to_mesa_src_reg
461 ir_to_mesa_visitor::src_reg_for_float(float val)
462 {
463 ir_to_mesa_src_reg src_reg;
464
465 src_reg.file = PROGRAM_CONSTANT;
466 src_reg.index = _mesa_add_unnamed_constant(this->prog->Parameters,
467 &val, 1, &src_reg.swizzle);
468 src_reg.reladdr = NULL;
469 src_reg.negate = 0;
470
471 return src_reg;
472 }
473
474 static int
475 type_size(const struct glsl_type *type)
476 {
477 unsigned int i;
478 int size;
479
480 switch (type->base_type) {
481 case GLSL_TYPE_UINT:
482 case GLSL_TYPE_INT:
483 case GLSL_TYPE_FLOAT:
484 case GLSL_TYPE_BOOL:
485 if (type->is_matrix()) {
486 return type->matrix_columns;
487 } else {
488 /* Regardless of size of vector, it gets a vec4. This is bad
489 * packing for things like floats, but otherwise arrays become a
490 * mess. Hopefully a later pass over the code can pack scalars
491 * down if appropriate.
492 */
493 return 1;
494 }
495 case GLSL_TYPE_ARRAY:
496 return type_size(type->fields.array) * type->length;
497 case GLSL_TYPE_STRUCT:
498 size = 0;
499 for (i = 0; i < type->length; i++) {
500 size += type_size(type->fields.structure[i].type);
501 }
502 return size;
503 default:
504 assert(0);
505 }
506 }
507
508 /**
509 * In the initial pass of codegen, we assign temporary numbers to
510 * intermediate results. (not SSA -- variable assignments will reuse
511 * storage). Actual register allocation for the Mesa VM occurs in a
512 * pass over the Mesa IR later.
513 */
514 ir_to_mesa_src_reg
515 ir_to_mesa_visitor::get_temp(const glsl_type *type)
516 {
517 ir_to_mesa_src_reg src_reg;
518 int swizzle[4];
519 int i;
520
521 assert(!type->is_array());
522
523 src_reg.file = PROGRAM_TEMPORARY;
524 src_reg.index = next_temp;
525 src_reg.reladdr = NULL;
526 next_temp += type_size(type);
527
528 for (i = 0; i < type->vector_elements; i++)
529 swizzle[i] = i;
530 for (; i < 4; i++)
531 swizzle[i] = type->vector_elements - 1;
532 src_reg.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1],
533 swizzle[2], swizzle[3]);
534 src_reg.negate = 0;
535
536 return src_reg;
537 }
538
539 variable_storage *
540 ir_to_mesa_visitor::find_variable_storage(ir_variable *var)
541 {
542
543 variable_storage *entry;
544
545 foreach_iter(exec_list_iterator, iter, this->variables) {
546 entry = (variable_storage *)iter.get();
547
548 if (entry->var == var)
549 return entry;
550 }
551
552 return NULL;
553 }
554
555 void
556 ir_to_mesa_visitor::visit(ir_variable *ir)
557 {
558 (void)ir;
559 }
560
561 void
562 ir_to_mesa_visitor::visit(ir_loop *ir)
563 {
564 assert(!ir->from);
565 assert(!ir->to);
566 assert(!ir->increment);
567 assert(!ir->counter);
568
569 ir_to_mesa_emit_op0(NULL, OPCODE_BGNLOOP);
570 visit_exec_list(&ir->body_instructions, this);
571 ir_to_mesa_emit_op0(NULL, OPCODE_ENDLOOP);
572 }
573
574 void
575 ir_to_mesa_visitor::visit(ir_loop_jump *ir)
576 {
577 switch (ir->mode) {
578 case ir_loop_jump::jump_break:
579 ir_to_mesa_emit_op0(NULL, OPCODE_BRK);
580 break;
581 case ir_loop_jump::jump_continue:
582 ir_to_mesa_emit_op0(NULL, OPCODE_CONT);
583 break;
584 }
585 }
586
587
588 void
589 ir_to_mesa_visitor::visit(ir_function_signature *ir)
590 {
591 assert(0);
592 (void)ir;
593 }
594
595 void
596 ir_to_mesa_visitor::visit(ir_function *ir)
597 {
598 /* Ignore function bodies other than main() -- we shouldn't see calls to
599 * them since they should all be inlined before we get to ir_to_mesa.
600 */
601 if (strcmp(ir->name, "main") == 0) {
602 const ir_function_signature *sig;
603 exec_list empty;
604
605 sig = ir->matching_signature(&empty);
606
607 assert(sig);
608
609 foreach_iter(exec_list_iterator, iter, sig->body) {
610 ir_instruction *ir = (ir_instruction *)iter.get();
611
612 ir->accept(this);
613 }
614 }
615 }
616
617 GLboolean
618 ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
619 {
620 int nonmul_operand = 1 - mul_operand;
621 ir_to_mesa_src_reg a, b, c;
622
623 ir_expression *expr = ir->operands[mul_operand]->as_expression();
624 if (!expr || expr->operation != ir_binop_mul)
625 return false;
626
627 expr->operands[0]->accept(this);
628 a = this->result;
629 expr->operands[1]->accept(this);
630 b = this->result;
631 ir->operands[nonmul_operand]->accept(this);
632 c = this->result;
633
634 this->result = get_temp(ir->type);
635 ir_to_mesa_emit_op3(ir, OPCODE_MAD,
636 ir_to_mesa_dst_reg_from_src(this->result), a, b, c);
637
638 return true;
639 }
640
641 void
642 ir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir,
643 ir_to_mesa_src_reg *reg, int *num_reladdr)
644 {
645 if (!reg->reladdr)
646 return;
647
648 ir_to_mesa_emit_op1(ir, OPCODE_ARL, ir_to_mesa_address_reg, *reg->reladdr);
649
650 if (*num_reladdr != 1) {
651 ir_to_mesa_src_reg temp = get_temp(glsl_type::vec4_type);
652
653 ir_to_mesa_emit_op1(ir, OPCODE_MOV,
654 ir_to_mesa_dst_reg_from_src(temp), *reg);
655 *reg = temp;
656 }
657
658 (*num_reladdr)--;
659 }
660
661 void
662 ir_to_mesa_visitor::visit(ir_expression *ir)
663 {
664 unsigned int operand;
665 struct ir_to_mesa_src_reg op[2];
666 struct ir_to_mesa_src_reg result_src;
667 struct ir_to_mesa_dst_reg result_dst;
668 const glsl_type *vec4_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 4, 1);
669 const glsl_type *vec3_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 3, 1);
670 const glsl_type *vec2_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 2, 1);
671
672 /* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c)
673 */
674 if (ir->operation == ir_binop_add) {
675 if (try_emit_mad(ir, 1))
676 return;
677 if (try_emit_mad(ir, 0))
678 return;
679 }
680
681 for (operand = 0; operand < ir->get_num_operands(); operand++) {
682 this->result.file = PROGRAM_UNDEFINED;
683 ir->operands[operand]->accept(this);
684 if (this->result.file == PROGRAM_UNDEFINED) {
685 ir_print_visitor v;
686 printf("Failed to get tree for expression operand:\n");
687 ir->operands[operand]->accept(&v);
688 exit(1);
689 }
690 op[operand] = this->result;
691
692 /* Matrix expression operands should have been broken down to vector
693 * operations already.
694 */
695 assert(!ir->operands[operand]->type->is_matrix());
696 }
697
698 this->result.file = PROGRAM_UNDEFINED;
699
700 /* Storage for our result. Ideally for an assignment we'd be using
701 * the actual storage for the result here, instead.
702 */
703 result_src = get_temp(ir->type);
704 /* convenience for the emit functions below. */
705 result_dst = ir_to_mesa_dst_reg_from_src(result_src);
706 /* Limit writes to the channels that will be used by result_src later.
707 * This does limit this temp's use as a temporary for multi-instruction
708 * sequences.
709 */
710 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
711
712 switch (ir->operation) {
713 case ir_unop_logic_not:
714 ir_to_mesa_emit_op2(ir, OPCODE_SEQ, result_dst,
715 op[0], src_reg_for_float(0.0));
716 break;
717 case ir_unop_neg:
718 op[0].negate = ~op[0].negate;
719 result_src = op[0];
720 break;
721 case ir_unop_abs:
722 ir_to_mesa_emit_op1(ir, OPCODE_ABS, result_dst, op[0]);
723 break;
724 case ir_unop_sign:
725 ir_to_mesa_emit_op1(ir, OPCODE_SSG, result_dst, op[0]);
726 break;
727 case ir_unop_rcp:
728 ir_to_mesa_emit_scalar_op1(ir, OPCODE_RCP, result_dst, op[0]);
729 break;
730
731 case ir_unop_exp:
732 ir_to_mesa_emit_scalar_op2(ir, OPCODE_POW, result_dst,
733 src_reg_for_float(M_E), op[0]);
734 break;
735 case ir_unop_exp2:
736 ir_to_mesa_emit_scalar_op1(ir, OPCODE_EX2, result_dst, op[0]);
737 break;
738 case ir_unop_log:
739 ir_to_mesa_emit_scalar_op1(ir, OPCODE_LOG, result_dst, op[0]);
740 break;
741 case ir_unop_log2:
742 ir_to_mesa_emit_scalar_op1(ir, OPCODE_LG2, result_dst, op[0]);
743 break;
744 case ir_unop_sin:
745 ir_to_mesa_emit_scalar_op1(ir, OPCODE_SIN, result_dst, op[0]);
746 break;
747 case ir_unop_cos:
748 ir_to_mesa_emit_scalar_op1(ir, OPCODE_COS, result_dst, op[0]);
749 break;
750
751 case ir_unop_dFdx:
752 ir_to_mesa_emit_op1(ir, OPCODE_DDX, result_dst, op[0]);
753 break;
754 case ir_unop_dFdy:
755 ir_to_mesa_emit_op1(ir, OPCODE_DDY, result_dst, op[0]);
756 break;
757
758 case ir_binop_add:
759 ir_to_mesa_emit_op2(ir, OPCODE_ADD, result_dst, op[0], op[1]);
760 break;
761 case ir_binop_sub:
762 ir_to_mesa_emit_op2(ir, OPCODE_SUB, result_dst, op[0], op[1]);
763 break;
764
765 case ir_binop_mul:
766 ir_to_mesa_emit_op2(ir, OPCODE_MUL, result_dst, op[0], op[1]);
767 break;
768 case ir_binop_div:
769 assert(!"not reached: should be handled by ir_div_to_mul_rcp");
770 case ir_binop_mod:
771 assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
772 break;
773
774 case ir_binop_less:
775 ir_to_mesa_emit_op2(ir, OPCODE_SLT, result_dst, op[0], op[1]);
776 break;
777 case ir_binop_greater:
778 ir_to_mesa_emit_op2(ir, OPCODE_SGT, result_dst, op[0], op[1]);
779 break;
780 case ir_binop_lequal:
781 ir_to_mesa_emit_op2(ir, OPCODE_SLE, result_dst, op[0], op[1]);
782 break;
783 case ir_binop_gequal:
784 ir_to_mesa_emit_op2(ir, OPCODE_SGE, result_dst, op[0], op[1]);
785 break;
786 case ir_binop_equal:
787 ir_to_mesa_emit_op2(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
788 break;
789 case ir_binop_logic_xor:
790 case ir_binop_nequal:
791 ir_to_mesa_emit_op2(ir, OPCODE_SNE, result_dst, op[0], op[1]);
792 break;
793
794 case ir_binop_logic_or:
795 /* This could be a saturated add and skip the SNE. */
796 ir_to_mesa_emit_op2(ir, OPCODE_ADD,
797 result_dst,
798 op[0], op[1]);
799
800 ir_to_mesa_emit_op2(ir, OPCODE_SNE,
801 result_dst,
802 result_src, src_reg_for_float(0.0));
803 break;
804
805 case ir_binop_logic_and:
806 /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
807 ir_to_mesa_emit_op2(ir, OPCODE_MUL,
808 result_dst,
809 op[0], op[1]);
810 break;
811
812 case ir_binop_dot:
813 if (ir->operands[0]->type == vec4_type) {
814 assert(ir->operands[1]->type == vec4_type);
815 ir_to_mesa_emit_op2(ir, OPCODE_DP4,
816 result_dst,
817 op[0], op[1]);
818 } else if (ir->operands[0]->type == vec3_type) {
819 assert(ir->operands[1]->type == vec3_type);
820 ir_to_mesa_emit_op2(ir, OPCODE_DP3,
821 result_dst,
822 op[0], op[1]);
823 } else if (ir->operands[0]->type == vec2_type) {
824 assert(ir->operands[1]->type == vec2_type);
825 ir_to_mesa_emit_op2(ir, OPCODE_DP2,
826 result_dst,
827 op[0], op[1]);
828 }
829 break;
830
831 case ir_binop_cross:
832 ir_to_mesa_emit_op2(ir, OPCODE_XPD, result_dst, op[0], op[1]);
833 break;
834
835 case ir_unop_sqrt:
836 ir_to_mesa_emit_scalar_op1(ir, OPCODE_RSQ, result_dst, op[0]);
837 ir_to_mesa_emit_scalar_op1(ir, OPCODE_RCP, result_dst, result_src);
838 /* For incoming channels < 0, set the result to 0. */
839 ir_to_mesa_emit_op3(ir, OPCODE_CMP, result_dst,
840 op[0], src_reg_for_float(0.0), result_src);
841 break;
842 case ir_unop_rsq:
843 ir_to_mesa_emit_scalar_op1(ir, OPCODE_RSQ, result_dst, op[0]);
844 break;
845 case ir_unop_i2f:
846 case ir_unop_b2f:
847 case ir_unop_b2i:
848 /* Mesa IR lacks types, ints are stored as truncated floats. */
849 result_src = op[0];
850 break;
851 case ir_unop_f2i:
852 ir_to_mesa_emit_op1(ir, OPCODE_TRUNC, result_dst, op[0]);
853 break;
854 case ir_unop_f2b:
855 case ir_unop_i2b:
856 ir_to_mesa_emit_op2(ir, OPCODE_SNE, result_dst,
857 result_src, src_reg_for_float(0.0));
858 break;
859 case ir_unop_trunc:
860 ir_to_mesa_emit_op1(ir, OPCODE_TRUNC, result_dst, op[0]);
861 break;
862 case ir_unop_ceil:
863 op[0].negate = ~op[0].negate;
864 ir_to_mesa_emit_op1(ir, OPCODE_FLR, result_dst, op[0]);
865 result_src.negate = ~result_src.negate;
866 break;
867 case ir_unop_floor:
868 ir_to_mesa_emit_op1(ir, OPCODE_FLR, result_dst, op[0]);
869 break;
870 case ir_unop_fract:
871 ir_to_mesa_emit_op1(ir, OPCODE_FRC, result_dst, op[0]);
872 break;
873
874 case ir_binop_min:
875 ir_to_mesa_emit_op2(ir, OPCODE_MIN, result_dst, op[0], op[1]);
876 break;
877 case ir_binop_max:
878 ir_to_mesa_emit_op2(ir, OPCODE_MAX, result_dst, op[0], op[1]);
879 break;
880 case ir_binop_pow:
881 ir_to_mesa_emit_scalar_op2(ir, OPCODE_POW, result_dst, op[0], op[1]);
882 break;
883
884 case ir_unop_bit_not:
885 case ir_unop_u2f:
886 case ir_binop_lshift:
887 case ir_binop_rshift:
888 case ir_binop_bit_and:
889 case ir_binop_bit_xor:
890 case ir_binop_bit_or:
891 assert(!"GLSL 1.30 features unsupported");
892 break;
893 }
894
895 this->result = result_src;
896 }
897
898
899 void
900 ir_to_mesa_visitor::visit(ir_swizzle *ir)
901 {
902 ir_to_mesa_src_reg src_reg;
903 int i;
904 int swizzle[4];
905
906 /* Note that this is only swizzles in expressions, not those on the left
907 * hand side of an assignment, which do write masking. See ir_assignment
908 * for that.
909 */
910
911 ir->val->accept(this);
912 src_reg = this->result;
913 assert(src_reg.file != PROGRAM_UNDEFINED);
914
915 for (i = 0; i < 4; i++) {
916 if (i < ir->type->vector_elements) {
917 switch (i) {
918 case 0:
919 swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.x);
920 break;
921 case 1:
922 swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.y);
923 break;
924 case 2:
925 swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.z);
926 break;
927 case 3:
928 swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.w);
929 break;
930 }
931 } else {
932 /* If the type is smaller than a vec4, replicate the last
933 * channel out.
934 */
935 swizzle[i] = swizzle[ir->type->vector_elements - 1];
936 }
937 }
938
939 src_reg.swizzle = MAKE_SWIZZLE4(swizzle[0],
940 swizzle[1],
941 swizzle[2],
942 swizzle[3]);
943
944 this->result = src_reg;
945 }
946
947 static int
948 add_matrix_ref(struct gl_program *prog, int *tokens)
949 {
950 int base_pos = -1;
951 int i;
952
953 /* Add a ref for each column. It looks like the reason we do
954 * it this way is that _mesa_add_state_reference doesn't work
955 * for things that aren't vec4s, so the tokens[2]/tokens[3]
956 * range has to be equal.
957 */
958 for (i = 0; i < 4; i++) {
959 tokens[2] = i;
960 tokens[3] = i;
961 int pos = _mesa_add_state_reference(prog->Parameters,
962 (gl_state_index *)tokens);
963 if (base_pos == -1)
964 base_pos = pos;
965 else
966 assert(base_pos + i == pos);
967 }
968
969 return base_pos;
970 }
971
972 static variable_storage *
973 get_builtin_matrix_ref(void *mem_ctx, struct gl_program *prog, ir_variable *var,
974 ir_rvalue *array_index)
975 {
976 /*
977 * NOTE: The ARB_vertex_program extension specified that matrices get
978 * loaded in registers in row-major order. With GLSL, we want column-
979 * major order. So, we need to transpose all matrices here...
980 */
981 static const struct {
982 const char *name;
983 int matrix;
984 int modifier;
985 } matrices[] = {
986 { "gl_ModelViewMatrix", STATE_MODELVIEW_MATRIX, STATE_MATRIX_TRANSPOSE },
987 { "gl_ModelViewMatrixInverse", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVTRANS },
988 { "gl_ModelViewMatrixTranspose", STATE_MODELVIEW_MATRIX, 0 },
989 { "gl_ModelViewMatrixInverseTranspose", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVERSE },
990
991 { "gl_ProjectionMatrix", STATE_PROJECTION_MATRIX, STATE_MATRIX_TRANSPOSE },
992 { "gl_ProjectionMatrixInverse", STATE_PROJECTION_MATRIX, STATE_MATRIX_INVTRANS },
993 { "gl_ProjectionMatrixTranspose", STATE_PROJECTION_MATRIX, 0 },
994 { "gl_ProjectionMatrixInverseTranspose", STATE_PROJECTION_MATRIX, STATE_MATRIX_INVERSE },
995
996 { "gl_ModelViewProjectionMatrix", STATE_MVP_MATRIX, STATE_MATRIX_TRANSPOSE },
997 { "gl_ModelViewProjectionMatrixInverse", STATE_MVP_MATRIX, STATE_MATRIX_INVTRANS },
998 { "gl_ModelViewProjectionMatrixTranspose", STATE_MVP_MATRIX, 0 },
999 { "gl_ModelViewProjectionMatrixInverseTranspose", STATE_MVP_MATRIX, STATE_MATRIX_INVERSE },
1000
1001 { "gl_TextureMatrix", STATE_TEXTURE_MATRIX, STATE_MATRIX_TRANSPOSE },
1002 { "gl_TextureMatrixInverse", STATE_TEXTURE_MATRIX, STATE_MATRIX_INVTRANS },
1003 { "gl_TextureMatrixTranspose", STATE_TEXTURE_MATRIX, 0 },
1004 { "gl_TextureMatrixInverseTranspose", STATE_TEXTURE_MATRIX, STATE_MATRIX_INVERSE },
1005
1006 { "gl_NormalMatrix", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVERSE },
1007
1008 };
1009 unsigned int i;
1010 variable_storage *entry;
1011
1012 /* C++ gets angry when we try to use an int as a gl_state_index, so we use
1013 * ints for gl_state_index. Make sure they're compatible.
1014 */
1015 assert(sizeof(gl_state_index) == sizeof(int));
1016
1017 for (i = 0; i < Elements(matrices); i++) {
1018 if (strcmp(var->name, matrices[i].name) == 0) {
1019 int tokens[STATE_LENGTH];
1020 int base_pos = -1;
1021
1022 tokens[0] = matrices[i].matrix;
1023 tokens[4] = matrices[i].modifier;
1024 if (matrices[i].matrix == STATE_TEXTURE_MATRIX) {
1025 ir_constant *index = array_index->constant_expression_value();
1026 if (index) {
1027 tokens[1] = index->value.i[0];
1028 base_pos = add_matrix_ref(prog, tokens);
1029 } else {
1030 for (i = 0; i < var->type->length; i++) {
1031 tokens[1] = i;
1032 int pos = add_matrix_ref(prog, tokens);
1033 if (base_pos == -1)
1034 base_pos = pos;
1035 else
1036 assert(base_pos + (int)i * 4 == pos);
1037 }
1038 }
1039 } else {
1040 tokens[1] = 0; /* unused array index */
1041 base_pos = add_matrix_ref(prog, tokens);
1042 }
1043 tokens[4] = matrices[i].modifier;
1044
1045 entry = new(mem_ctx) variable_storage(var,
1046 PROGRAM_STATE_VAR,
1047 base_pos);
1048
1049 return entry;
1050 }
1051 }
1052
1053 return NULL;
1054 }
1055
1056 void
1057 ir_to_mesa_visitor::visit(ir_dereference_variable *ir)
1058 {
1059 ir_to_mesa_src_reg src_reg;
1060 variable_storage *entry = find_variable_storage(ir->var);
1061 unsigned int loc;
1062
1063 if (!entry) {
1064 switch (ir->var->mode) {
1065 case ir_var_uniform:
1066 entry = get_builtin_matrix_ref(this->mem_ctx, this->prog, ir->var,
1067 NULL);
1068 if (entry)
1069 break;
1070
1071 /* FINISHME: Fix up uniform name for arrays and things */
1072 if (ir->var->type->base_type == GLSL_TYPE_SAMPLER) {
1073 /* FINISHME: we whack the location of the var here, which
1074 * is probably not expected. But we need to communicate
1075 * mesa's sampler number to the tex instruction.
1076 */
1077 int sampler = _mesa_add_sampler(this->prog->Parameters,
1078 ir->var->name,
1079 ir->var->type->gl_type);
1080 map_sampler(ir->var->location, sampler);
1081
1082 entry = new(mem_ctx) variable_storage(ir->var, PROGRAM_SAMPLER,
1083 sampler);
1084 this->variables.push_tail(entry);
1085 break;
1086 }
1087
1088 assert(ir->var->type->gl_type != 0 &&
1089 ir->var->type->gl_type != GL_INVALID_ENUM);
1090 loc = _mesa_add_uniform(this->prog->Parameters,
1091 ir->var->name,
1092 type_size(ir->var->type) * 4,
1093 ir->var->type->gl_type,
1094 NULL);
1095
1096 /* Always mark the uniform used at this point. If it isn't
1097 * used, dead code elimination should have nuked the decl already.
1098 */
1099 this->prog->Parameters->Parameters[loc].Used = GL_TRUE;
1100
1101 entry = new(mem_ctx) variable_storage(ir->var, PROGRAM_UNIFORM, loc);
1102 this->variables.push_tail(entry);
1103 break;
1104 case ir_var_in:
1105 case ir_var_out:
1106 case ir_var_inout:
1107 /* The linker assigns locations for varyings and attributes,
1108 * including deprecated builtins (like gl_Color), user-assign
1109 * generic attributes (glBindVertexLocation), and
1110 * user-defined varyings.
1111 *
1112 * FINISHME: We would hit this path for function arguments. Fix!
1113 */
1114 assert(ir->var->location != -1);
1115 if (ir->var->mode == ir_var_in ||
1116 ir->var->mode == ir_var_inout) {
1117 entry = new(mem_ctx) variable_storage(ir->var,
1118 PROGRAM_INPUT,
1119 ir->var->location);
1120
1121 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
1122 ir->var->location >= VERT_ATTRIB_GENERIC0) {
1123 _mesa_add_attribute(prog->Attributes,
1124 ir->var->name,
1125 type_size(ir->var->type) * 4,
1126 ir->var->type->gl_type,
1127 ir->var->location - VERT_ATTRIB_GENERIC0);
1128 }
1129 } else {
1130 entry = new(mem_ctx) variable_storage(ir->var,
1131 PROGRAM_OUTPUT,
1132 ir->var->location);
1133 }
1134
1135 break;
1136 case ir_var_auto:
1137 case ir_var_temporary:
1138 entry = new(mem_ctx) variable_storage(ir->var, PROGRAM_TEMPORARY,
1139 this->next_temp);
1140 this->variables.push_tail(entry);
1141
1142 next_temp += type_size(ir->var->type);
1143 break;
1144 }
1145
1146 if (!entry) {
1147 printf("Failed to make storage for %s\n", ir->var->name);
1148 exit(1);
1149 }
1150 }
1151
1152 src_reg.file = entry->file;
1153 src_reg.index = entry->index;
1154 /* If the type is smaller than a vec4, replicate the last channel out. */
1155 if (ir->type->is_scalar() || ir->type->is_vector())
1156 src_reg.swizzle = swizzle_for_size(ir->var->type->vector_elements);
1157 else
1158 src_reg.swizzle = SWIZZLE_NOOP;
1159 src_reg.reladdr = NULL;
1160 src_reg.negate = 0;
1161
1162 this->result = src_reg;
1163 }
1164
1165 void
1166 ir_to_mesa_visitor::visit(ir_dereference_array *ir)
1167 {
1168 ir_constant *index;
1169 ir_to_mesa_src_reg src_reg;
1170 ir_dereference_variable *deref_var = ir->array->as_dereference_variable();
1171 int element_size = type_size(ir->type);
1172
1173 index = ir->array_index->constant_expression_value();
1174
1175 if (deref_var && strncmp(deref_var->var->name,
1176 "gl_TextureMatrix",
1177 strlen("gl_TextureMatrix")) == 0) {
1178 ir_to_mesa_src_reg src_reg;
1179 struct variable_storage *entry;
1180
1181 entry = get_builtin_matrix_ref(this->mem_ctx, this->prog, deref_var->var,
1182 ir->array_index);
1183 assert(entry);
1184
1185 src_reg.file = entry->file;
1186 src_reg.index = entry->index;
1187 src_reg.swizzle = swizzle_for_size(ir->type->vector_elements);
1188 src_reg.negate = 0;
1189
1190 if (index) {
1191 src_reg.reladdr = NULL;
1192 } else {
1193 ir_to_mesa_src_reg index_reg = get_temp(glsl_type::float_type);
1194
1195 ir->array_index->accept(this);
1196 ir_to_mesa_emit_op2(ir, OPCODE_MUL,
1197 ir_to_mesa_dst_reg_from_src(index_reg),
1198 this->result, src_reg_for_float(element_size));
1199
1200 src_reg.reladdr = talloc(mem_ctx, ir_to_mesa_src_reg);
1201 memcpy(src_reg.reladdr, &index_reg, sizeof(index_reg));
1202 }
1203
1204 this->result = src_reg;
1205 return;
1206 }
1207
1208 ir->array->accept(this);
1209 src_reg = this->result;
1210
1211 if (index) {
1212 src_reg.index += index->value.i[0] * element_size;
1213 } else {
1214 ir_to_mesa_src_reg array_base = this->result;
1215 /* Variable index array dereference. It eats the "vec4" of the
1216 * base of the array and an index that offsets the Mesa register
1217 * index.
1218 */
1219 ir->array_index->accept(this);
1220
1221 ir_to_mesa_src_reg index_reg;
1222
1223 if (element_size == 1) {
1224 index_reg = this->result;
1225 } else {
1226 index_reg = get_temp(glsl_type::float_type);
1227
1228 ir_to_mesa_emit_op2(ir, OPCODE_MUL,
1229 ir_to_mesa_dst_reg_from_src(index_reg),
1230 this->result, src_reg_for_float(element_size));
1231 }
1232
1233 src_reg.reladdr = talloc(mem_ctx, ir_to_mesa_src_reg);
1234 memcpy(src_reg.reladdr, &index_reg, sizeof(index_reg));
1235 }
1236
1237 /* If the type is smaller than a vec4, replicate the last channel out. */
1238 if (ir->type->is_scalar() || ir->type->is_vector())
1239 src_reg.swizzle = swizzle_for_size(ir->type->vector_elements);
1240 else
1241 src_reg.swizzle = SWIZZLE_NOOP;
1242
1243 this->result = src_reg;
1244 }
1245
1246 void
1247 ir_to_mesa_visitor::visit(ir_dereference_record *ir)
1248 {
1249 unsigned int i;
1250 const glsl_type *struct_type = ir->record->type;
1251 int offset = 0;
1252
1253 ir->record->accept(this);
1254
1255 for (i = 0; i < struct_type->length; i++) {
1256 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1257 break;
1258 offset += type_size(struct_type->fields.structure[i].type);
1259 }
1260 this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1261 this->result.index += offset;
1262 }
1263
1264 /**
1265 * We want to be careful in assignment setup to hit the actual storage
1266 * instead of potentially using a temporary like we might with the
1267 * ir_dereference handler.
1268 *
1269 * Thanks to ir_swizzle_swizzle, and ir_vec_index_to_swizzle, we
1270 * should only see potentially one variable array index of a vector,
1271 * and one swizzle, before getting to actual vec4 storage. So handle
1272 * those, then go use ir_dereference to handle the rest.
1273 */
1274 static struct ir_to_mesa_dst_reg
1275 get_assignment_lhs(ir_instruction *ir, ir_to_mesa_visitor *v,
1276 ir_to_mesa_src_reg *r)
1277 {
1278 struct ir_to_mesa_dst_reg dst_reg;
1279 ir_swizzle *swiz;
1280
1281 ir_dereference_array *deref_array = ir->as_dereference_array();
1282 /* This should have been handled by ir_vec_index_to_cond_assign */
1283 if (deref_array) {
1284 assert(!deref_array->array->type->is_vector());
1285 }
1286
1287 /* Use the rvalue deref handler for the most part. We'll ignore
1288 * swizzles in it and write swizzles using writemask, though.
1289 */
1290 ir->accept(v);
1291 dst_reg = ir_to_mesa_dst_reg_from_src(v->result);
1292
1293 if ((swiz = ir->as_swizzle())) {
1294 int swizzles[4] = {
1295 swiz->mask.x,
1296 swiz->mask.y,
1297 swiz->mask.z,
1298 swiz->mask.w
1299 };
1300 int new_r_swizzle[4];
1301 int orig_r_swizzle = r->swizzle;
1302 int i;
1303
1304 for (i = 0; i < 4; i++) {
1305 new_r_swizzle[i] = GET_SWZ(orig_r_swizzle, 0);
1306 }
1307
1308 dst_reg.writemask = 0;
1309 for (i = 0; i < 4; i++) {
1310 if (i < swiz->mask.num_components) {
1311 dst_reg.writemask |= 1 << swizzles[i];
1312 new_r_swizzle[swizzles[i]] = GET_SWZ(orig_r_swizzle, i);
1313 }
1314 }
1315
1316 r->swizzle = MAKE_SWIZZLE4(new_r_swizzle[0],
1317 new_r_swizzle[1],
1318 new_r_swizzle[2],
1319 new_r_swizzle[3]);
1320 }
1321
1322 return dst_reg;
1323 }
1324
1325 void
1326 ir_to_mesa_visitor::visit(ir_assignment *ir)
1327 {
1328 struct ir_to_mesa_dst_reg l;
1329 struct ir_to_mesa_src_reg r;
1330 int i;
1331
1332 assert(!ir->lhs->type->is_array());
1333
1334 ir->rhs->accept(this);
1335 r = this->result;
1336
1337 l = get_assignment_lhs(ir->lhs, this, &r);
1338
1339 assert(l.file != PROGRAM_UNDEFINED);
1340 assert(r.file != PROGRAM_UNDEFINED);
1341
1342 if (ir->condition) {
1343 ir_to_mesa_src_reg condition;
1344
1345 ir->condition->accept(this);
1346 condition = this->result;
1347
1348 /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves,
1349 * and the condition we produced is 0.0 or 1.0. By flipping the
1350 * sign, we can choose which value OPCODE_CMP produces without
1351 * an extra computing the condition.
1352 */
1353 condition.negate = ~condition.negate;
1354 for (i = 0; i < type_size(ir->lhs->type); i++) {
1355 ir_to_mesa_emit_op3(ir, OPCODE_CMP, l,
1356 condition, r, ir_to_mesa_src_reg_from_dst(l));
1357 l.index++;
1358 r.index++;
1359 }
1360 } else {
1361 for (i = 0; i < type_size(ir->lhs->type); i++) {
1362 ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r);
1363 l.index++;
1364 r.index++;
1365 }
1366 }
1367 }
1368
1369
1370 void
1371 ir_to_mesa_visitor::visit(ir_constant *ir)
1372 {
1373 ir_to_mesa_src_reg src_reg;
1374 GLfloat stack_vals[4];
1375 GLfloat *values = stack_vals;
1376 unsigned int i;
1377
1378 if (ir->type->is_array()) {
1379 ir->print();
1380 printf("\n");
1381 assert(!"FINISHME: array constants");
1382 }
1383
1384 if (ir->type->is_matrix()) {
1385 /* Unfortunately, 4 floats is all we can get into
1386 * _mesa_add_unnamed_constant. So, make a temp to store the
1387 * matrix and move each constant value into it. If we get
1388 * lucky, copy propagation will eliminate the extra moves.
1389 */
1390 ir_to_mesa_src_reg mat = get_temp(glsl_type::vec4_type);
1391 ir_to_mesa_dst_reg mat_column = ir_to_mesa_dst_reg_from_src(mat);
1392
1393 for (i = 0; i < ir->type->matrix_columns; i++) {
1394 src_reg.file = PROGRAM_CONSTANT;
1395
1396 assert(ir->type->base_type == GLSL_TYPE_FLOAT);
1397 values = &ir->value.f[i * ir->type->vector_elements];
1398
1399 src_reg.index = _mesa_add_unnamed_constant(this->prog->Parameters,
1400 values,
1401 ir->type->vector_elements,
1402 &src_reg.swizzle);
1403 src_reg.reladdr = NULL;
1404 src_reg.negate = 0;
1405 ir_to_mesa_emit_op1(ir, OPCODE_MOV, mat_column, src_reg);
1406
1407 mat_column.index++;
1408 }
1409
1410 this->result = mat;
1411 }
1412
1413 src_reg.file = PROGRAM_CONSTANT;
1414 switch (ir->type->base_type) {
1415 case GLSL_TYPE_FLOAT:
1416 values = &ir->value.f[0];
1417 break;
1418 case GLSL_TYPE_UINT:
1419 for (i = 0; i < ir->type->vector_elements; i++) {
1420 values[i] = ir->value.u[i];
1421 }
1422 break;
1423 case GLSL_TYPE_INT:
1424 for (i = 0; i < ir->type->vector_elements; i++) {
1425 values[i] = ir->value.i[i];
1426 }
1427 break;
1428 case GLSL_TYPE_BOOL:
1429 for (i = 0; i < ir->type->vector_elements; i++) {
1430 values[i] = ir->value.b[i];
1431 }
1432 break;
1433 default:
1434 assert(!"Non-float/uint/int/bool constant");
1435 }
1436
1437 src_reg.index = _mesa_add_unnamed_constant(this->prog->Parameters,
1438 values, ir->type->vector_elements,
1439 &src_reg.swizzle);
1440 src_reg.reladdr = NULL;
1441 src_reg.negate = 0;
1442
1443 this->result = src_reg;
1444 }
1445
1446 function_entry *
1447 ir_to_mesa_visitor::get_function_signature(ir_function_signature *sig)
1448 {
1449 function_entry *entry;
1450
1451 foreach_iter(exec_list_iterator, iter, this->function_signatures) {
1452 entry = (function_entry *)iter.get();
1453
1454 if (entry->sig == sig)
1455 return entry;
1456 }
1457
1458 entry = talloc(mem_ctx, function_entry);
1459 entry->sig = sig;
1460 entry->sig_id = this->next_signature_id++;
1461 entry->bgn_inst = NULL;
1462
1463 /* Allocate storage for all the parameters. */
1464 foreach_iter(exec_list_iterator, iter, sig->parameters) {
1465 ir_variable *param = (ir_variable *)iter.get();
1466 variable_storage *storage;
1467
1468 storage = find_variable_storage(param);
1469 assert(!storage);
1470
1471 storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY,
1472 this->next_temp);
1473 this->variables.push_tail(storage);
1474
1475 this->next_temp += type_size(param->type);
1476 break;
1477 }
1478
1479 if (sig->return_type) {
1480 entry->return_reg = get_temp(sig->return_type);
1481 } else {
1482 entry->return_reg = ir_to_mesa_undef;
1483 }
1484
1485 this->function_signatures.push_tail(entry);
1486 return entry;
1487 }
1488
1489 void
1490 ir_to_mesa_visitor::visit(ir_call *ir)
1491 {
1492 ir_to_mesa_instruction *call_inst;
1493 ir_function_signature *sig = ir->get_callee();
1494 function_entry *entry = get_function_signature(sig);
1495 int i;
1496
1497 /* Process in parameters. */
1498 exec_list_iterator sig_iter = sig->parameters.iterator();
1499 foreach_iter(exec_list_iterator, iter, *ir) {
1500 ir_rvalue *param_rval = (ir_rvalue *)iter.get();
1501 ir_variable *param = (ir_variable *)sig_iter.get();
1502
1503 if (param->mode == ir_var_in ||
1504 param->mode == ir_var_inout) {
1505 variable_storage *storage = find_variable_storage(param);
1506 assert(storage);
1507
1508 param_rval->accept(this);
1509 ir_to_mesa_src_reg r = this->result;
1510
1511 ir_to_mesa_dst_reg l;
1512 l.file = storage->file;
1513 l.index = storage->index;
1514 l.reladdr = NULL;
1515 l.writemask = WRITEMASK_XYZW;
1516 l.cond_mask = COND_TR;
1517
1518 for (i = 0; i < type_size(param->type); i++) {
1519 ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r);
1520 l.index++;
1521 r.index++;
1522 }
1523 }
1524
1525 sig_iter.next();
1526 }
1527 assert(!sig_iter.has_next());
1528
1529 /* Emit call instruction */
1530 call_inst = ir_to_mesa_emit_op1(ir, OPCODE_CAL,
1531 ir_to_mesa_undef_dst, ir_to_mesa_undef);
1532 call_inst->function = entry;
1533
1534 /* Process out parameters. */
1535 sig_iter = sig->parameters.iterator();
1536 foreach_iter(exec_list_iterator, iter, *ir) {
1537 ir_rvalue *param_rval = (ir_rvalue *)iter.get();
1538 ir_variable *param = (ir_variable *)sig_iter.get();
1539
1540 if (param->mode == ir_var_out ||
1541 param->mode == ir_var_inout) {
1542 variable_storage *storage = find_variable_storage(param);
1543 assert(storage);
1544
1545 ir_to_mesa_src_reg r;
1546 r.file = storage->file;
1547 r.index = storage->index;
1548 r.reladdr = NULL;
1549 r.swizzle = SWIZZLE_NOOP;
1550 r.negate = 0;
1551
1552 param_rval->accept(this);
1553 ir_to_mesa_dst_reg l = ir_to_mesa_dst_reg_from_src(this->result);
1554
1555 for (i = 0; i < type_size(param->type); i++) {
1556 ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r);
1557 l.index++;
1558 r.index++;
1559 }
1560 }
1561
1562 sig_iter.next();
1563 }
1564 assert(!sig_iter.has_next());
1565
1566 /* Process return value. */
1567 this->result = entry->return_reg;
1568 }
1569
1570
1571 void
1572 ir_to_mesa_visitor::visit(ir_texture *ir)
1573 {
1574 ir_to_mesa_src_reg result_src, coord, lod_info = { 0 }, projector;
1575 ir_to_mesa_dst_reg result_dst, coord_dst;
1576 ir_to_mesa_instruction *inst = NULL;
1577 prog_opcode opcode = OPCODE_NOP;
1578
1579 ir->coordinate->accept(this);
1580
1581 /* Put our coords in a temp. We'll need to modify them for shadow,
1582 * projection, or LOD, so the only case we'd use it as is is if
1583 * we're doing plain old texturing. Mesa IR optimization should
1584 * handle cleaning up our mess in that case.
1585 */
1586 coord = get_temp(glsl_type::vec4_type);
1587 coord_dst = ir_to_mesa_dst_reg_from_src(coord);
1588 ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst,
1589 this->result);
1590
1591 if (ir->projector) {
1592 ir->projector->accept(this);
1593 projector = this->result;
1594 }
1595
1596 /* Storage for our result. Ideally for an assignment we'd be using
1597 * the actual storage for the result here, instead.
1598 */
1599 result_src = get_temp(glsl_type::vec4_type);
1600 result_dst = ir_to_mesa_dst_reg_from_src(result_src);
1601
1602 switch (ir->op) {
1603 case ir_tex:
1604 opcode = OPCODE_TEX;
1605 break;
1606 case ir_txb:
1607 opcode = OPCODE_TXB;
1608 ir->lod_info.bias->accept(this);
1609 lod_info = this->result;
1610 break;
1611 case ir_txl:
1612 opcode = OPCODE_TXL;
1613 ir->lod_info.lod->accept(this);
1614 lod_info = this->result;
1615 break;
1616 case ir_txd:
1617 case ir_txf:
1618 assert(!"GLSL 1.30 features unsupported");
1619 break;
1620 }
1621
1622 if (ir->projector) {
1623 if (opcode == OPCODE_TEX) {
1624 /* Slot the projector in as the last component of the coord. */
1625 coord_dst.writemask = WRITEMASK_W;
1626 ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst, projector);
1627 coord_dst.writemask = WRITEMASK_XYZW;
1628 opcode = OPCODE_TXP;
1629 } else {
1630 ir_to_mesa_src_reg coord_w = coord;
1631 coord_w.swizzle = SWIZZLE_WWWW;
1632
1633 /* For the other TEX opcodes there's no projective version
1634 * since the last slot is taken up by lod info. Do the
1635 * projective divide now.
1636 */
1637 coord_dst.writemask = WRITEMASK_W;
1638 ir_to_mesa_emit_op1(ir, OPCODE_RCP, coord_dst, projector);
1639
1640 coord_dst.writemask = WRITEMASK_XYZ;
1641 ir_to_mesa_emit_op2(ir, OPCODE_MUL, coord_dst, coord, coord_w);
1642
1643 coord_dst.writemask = WRITEMASK_XYZW;
1644 coord.swizzle = SWIZZLE_XYZW;
1645 }
1646 }
1647
1648 if (ir->shadow_comparitor) {
1649 /* Slot the shadow value in as the second to last component of the
1650 * coord.
1651 */
1652 ir->shadow_comparitor->accept(this);
1653 coord_dst.writemask = WRITEMASK_Z;
1654 ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst, this->result);
1655 coord_dst.writemask = WRITEMASK_XYZW;
1656 }
1657
1658 if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) {
1659 /* Mesa IR stores lod or lod bias in the last channel of the coords. */
1660 coord_dst.writemask = WRITEMASK_W;
1661 ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst, lod_info);
1662 coord_dst.writemask = WRITEMASK_XYZW;
1663 }
1664
1665 inst = ir_to_mesa_emit_op1(ir, opcode, result_dst, coord);
1666
1667 if (ir->shadow_comparitor)
1668 inst->tex_shadow = GL_TRUE;
1669
1670 ir_dereference_variable *sampler = ir->sampler->as_dereference_variable();
1671 assert(sampler); /* FINISHME: sampler arrays */
1672 /* generate the mapping, remove when we generate storage at
1673 * declaration time
1674 */
1675 sampler->accept(this);
1676
1677 inst->sampler = get_sampler_number(sampler->var->location);
1678
1679 switch (sampler->type->sampler_dimensionality) {
1680 case GLSL_SAMPLER_DIM_1D:
1681 inst->tex_target = TEXTURE_1D_INDEX;
1682 break;
1683 case GLSL_SAMPLER_DIM_2D:
1684 inst->tex_target = TEXTURE_2D_INDEX;
1685 break;
1686 case GLSL_SAMPLER_DIM_3D:
1687 inst->tex_target = TEXTURE_3D_INDEX;
1688 break;
1689 case GLSL_SAMPLER_DIM_CUBE:
1690 inst->tex_target = TEXTURE_CUBE_INDEX;
1691 break;
1692 default:
1693 assert(!"FINISHME: other texture targets");
1694 }
1695
1696 this->result = result_src;
1697 }
1698
1699 void
1700 ir_to_mesa_visitor::visit(ir_return *ir)
1701 {
1702 assert(current_function);
1703
1704 if (ir->get_value()) {
1705 ir_to_mesa_dst_reg l;
1706 int i;
1707
1708 ir->get_value()->accept(this);
1709 ir_to_mesa_src_reg r = this->result;
1710
1711 l = ir_to_mesa_dst_reg_from_src(current_function->return_reg);
1712
1713 for (i = 0; i < type_size(current_function->sig->return_type); i++) {
1714 ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r);
1715 l.index++;
1716 r.index++;
1717 }
1718 }
1719
1720 ir_to_mesa_emit_op0(ir, OPCODE_RET);
1721 }
1722
1723 void
1724 ir_to_mesa_visitor::visit(ir_discard *ir)
1725 {
1726 assert(ir->condition == NULL); /* FINISHME */
1727
1728 ir_to_mesa_emit_op0(ir, OPCODE_KIL_NV);
1729 }
1730
1731 void
1732 ir_to_mesa_visitor::visit(ir_if *ir)
1733 {
1734 ir_to_mesa_instruction *cond_inst, *if_inst, *else_inst = NULL;
1735 ir_to_mesa_instruction *prev_inst;
1736
1737 prev_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
1738
1739 ir->condition->accept(this);
1740 assert(this->result.file != PROGRAM_UNDEFINED);
1741
1742 if (ctx->Shader.EmitCondCodes) {
1743 cond_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
1744
1745 /* See if we actually generated any instruction for generating
1746 * the condition. If not, then cook up a move to a temp so we
1747 * have something to set cond_update on.
1748 */
1749 if (cond_inst == prev_inst) {
1750 ir_to_mesa_src_reg temp = get_temp(glsl_type::bool_type);
1751 cond_inst = ir_to_mesa_emit_op1(ir->condition, OPCODE_MOV,
1752 ir_to_mesa_dst_reg_from_src(temp),
1753 result);
1754 }
1755 cond_inst->cond_update = GL_TRUE;
1756
1757 if_inst = ir_to_mesa_emit_op0(ir->condition, OPCODE_IF);
1758 if_inst->dst_reg.cond_mask = COND_NE;
1759 } else {
1760 if_inst = ir_to_mesa_emit_op1(ir->condition,
1761 OPCODE_IF, ir_to_mesa_undef_dst,
1762 this->result);
1763 }
1764
1765 this->instructions.push_tail(if_inst);
1766
1767 visit_exec_list(&ir->then_instructions, this);
1768
1769 if (!ir->else_instructions.is_empty()) {
1770 else_inst = ir_to_mesa_emit_op0(ir->condition, OPCODE_ELSE);
1771 visit_exec_list(&ir->else_instructions, this);
1772 }
1773
1774 if_inst = ir_to_mesa_emit_op1(ir->condition, OPCODE_ENDIF,
1775 ir_to_mesa_undef_dst, ir_to_mesa_undef);
1776 }
1777
1778 ir_to_mesa_visitor::ir_to_mesa_visitor()
1779 {
1780 result.file = PROGRAM_UNDEFINED;
1781 next_temp = 1;
1782 next_signature_id = 1;
1783 sampler_map = NULL;
1784 sampler_map_size = 0;
1785 current_function = NULL;
1786 }
1787
1788 static struct prog_src_register
1789 mesa_src_reg_from_ir_src_reg(ir_to_mesa_src_reg reg)
1790 {
1791 struct prog_src_register mesa_reg;
1792
1793 mesa_reg.File = reg.file;
1794 assert(reg.index < (1 << INST_INDEX_BITS) - 1);
1795 mesa_reg.Index = reg.index;
1796 mesa_reg.Swizzle = reg.swizzle;
1797 mesa_reg.RelAddr = reg.reladdr != NULL;
1798 mesa_reg.Negate = reg.negate;
1799 mesa_reg.Abs = 0;
1800
1801 return mesa_reg;
1802 }
1803
1804 static void
1805 set_branchtargets(ir_to_mesa_visitor *v,
1806 struct prog_instruction *mesa_instructions,
1807 int num_instructions)
1808 {
1809 int if_count = 0, loop_count = 0;
1810 int *if_stack, *loop_stack;
1811 int if_stack_pos = 0, loop_stack_pos = 0;
1812 int i, j;
1813
1814 for (i = 0; i < num_instructions; i++) {
1815 switch (mesa_instructions[i].Opcode) {
1816 case OPCODE_IF:
1817 if_count++;
1818 break;
1819 case OPCODE_BGNLOOP:
1820 loop_count++;
1821 break;
1822 case OPCODE_BRK:
1823 case OPCODE_CONT:
1824 mesa_instructions[i].BranchTarget = -1;
1825 break;
1826 default:
1827 break;
1828 }
1829 }
1830
1831 if_stack = (int *)calloc(if_count, sizeof(*if_stack));
1832 loop_stack = (int *)calloc(loop_count, sizeof(*loop_stack));
1833
1834 for (i = 0; i < num_instructions; i++) {
1835 switch (mesa_instructions[i].Opcode) {
1836 case OPCODE_IF:
1837 if_stack[if_stack_pos] = i;
1838 if_stack_pos++;
1839 break;
1840 case OPCODE_ELSE:
1841 mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
1842 if_stack[if_stack_pos - 1] = i;
1843 break;
1844 case OPCODE_ENDIF:
1845 mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
1846 if_stack_pos--;
1847 break;
1848 case OPCODE_BGNLOOP:
1849 loop_stack[loop_stack_pos] = i;
1850 loop_stack_pos++;
1851 break;
1852 case OPCODE_ENDLOOP:
1853 loop_stack_pos--;
1854 /* Rewrite any breaks/conts at this nesting level (haven't
1855 * already had a BranchTarget assigned) to point to the end
1856 * of the loop.
1857 */
1858 for (j = loop_stack[loop_stack_pos]; j < i; j++) {
1859 if (mesa_instructions[j].Opcode == OPCODE_BRK ||
1860 mesa_instructions[j].Opcode == OPCODE_CONT) {
1861 if (mesa_instructions[j].BranchTarget == -1) {
1862 mesa_instructions[j].BranchTarget = i;
1863 }
1864 }
1865 }
1866 /* The loop ends point at each other. */
1867 mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos];
1868 mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i;
1869 break;
1870 case OPCODE_CAL:
1871 foreach_iter(exec_list_iterator, iter, v->function_signatures) {
1872 function_entry *entry = (function_entry *)iter.get();
1873
1874 if (entry->sig_id == mesa_instructions[i].BranchTarget) {
1875 mesa_instructions[i].BranchTarget = entry->inst;
1876 break;
1877 }
1878 }
1879 break;
1880 default:
1881 break;
1882 }
1883 }
1884
1885 free(if_stack);
1886 }
1887
1888 static void
1889 print_program(struct prog_instruction *mesa_instructions,
1890 ir_instruction **mesa_instruction_annotation,
1891 int num_instructions)
1892 {
1893 ir_instruction *last_ir = NULL;
1894 int i;
1895 int indent = 0;
1896
1897 for (i = 0; i < num_instructions; i++) {
1898 struct prog_instruction *mesa_inst = mesa_instructions + i;
1899 ir_instruction *ir = mesa_instruction_annotation[i];
1900
1901 fprintf(stdout, "%3d: ", i);
1902
1903 if (last_ir != ir && ir) {
1904 int j;
1905
1906 for (j = 0; j < indent; j++) {
1907 fprintf(stdout, " ");
1908 }
1909 ir->print();
1910 printf("\n");
1911 last_ir = ir;
1912
1913 fprintf(stdout, " "); /* line number spacing. */
1914 }
1915
1916 indent = _mesa_fprint_instruction_opt(stdout, mesa_inst, indent,
1917 PROG_PRINT_DEBUG, NULL);
1918 }
1919 }
1920
1921 static void
1922 mark_input(struct gl_program *prog,
1923 int index,
1924 GLboolean reladdr)
1925 {
1926 prog->InputsRead |= BITFIELD64_BIT(index);
1927 int i;
1928
1929 if (reladdr) {
1930 if (index >= FRAG_ATTRIB_TEX0 && index <= FRAG_ATTRIB_TEX7) {
1931 for (i = 0; i < 8; i++) {
1932 prog->InputsRead |= BITFIELD64_BIT(FRAG_ATTRIB_TEX0 + i);
1933 }
1934 } else {
1935 assert(!"FINISHME: Mark InputsRead for varying arrays");
1936 }
1937 }
1938 }
1939
1940 static void
1941 mark_output(struct gl_program *prog,
1942 int index,
1943 GLboolean reladdr)
1944 {
1945 prog->OutputsWritten |= BITFIELD64_BIT(index);
1946 int i;
1947
1948 if (reladdr) {
1949 if (index >= VERT_RESULT_TEX0 && index <= VERT_RESULT_TEX7) {
1950 for (i = 0; i < 8; i++) {
1951 prog->OutputsWritten |= BITFIELD64_BIT(FRAG_ATTRIB_TEX0 + i);
1952 }
1953 } else {
1954 assert(!"FINISHME: Mark OutputsWritten for varying arrays");
1955 }
1956 }
1957 }
1958
1959 static void
1960 count_resources(struct gl_program *prog)
1961 {
1962 unsigned int i;
1963
1964 prog->InputsRead = 0;
1965 prog->OutputsWritten = 0;
1966 prog->SamplersUsed = 0;
1967
1968 for (i = 0; i < prog->NumInstructions; i++) {
1969 struct prog_instruction *inst = &prog->Instructions[i];
1970 unsigned int reg;
1971
1972 switch (inst->DstReg.File) {
1973 case PROGRAM_OUTPUT:
1974 mark_output(prog, inst->DstReg.Index, inst->DstReg.RelAddr);
1975 break;
1976 case PROGRAM_INPUT:
1977 mark_input(prog, inst->DstReg.Index, inst->DstReg.RelAddr);
1978 break;
1979 default:
1980 break;
1981 }
1982
1983 for (reg = 0; reg < _mesa_num_inst_src_regs(inst->Opcode); reg++) {
1984 switch (inst->SrcReg[reg].File) {
1985 case PROGRAM_OUTPUT:
1986 mark_output(prog, inst->SrcReg[reg].Index,
1987 inst->SrcReg[reg].RelAddr);
1988 break;
1989 case PROGRAM_INPUT:
1990 mark_input(prog, inst->SrcReg[reg].Index, inst->SrcReg[reg].RelAddr);
1991 break;
1992 default:
1993 break;
1994 }
1995 }
1996
1997 /* Instead of just using the uniform's value to map to a
1998 * sampler, Mesa first allocates a separate number for the
1999 * sampler (_mesa_add_sampler), then we reindex it down to a
2000 * small integer (sampler_map[], SamplersUsed), then that gets
2001 * mapped to the uniform's value, and we get an actual sampler.
2002 */
2003 if (_mesa_is_tex_instruction(inst->Opcode)) {
2004 prog->SamplerTargets[inst->TexSrcUnit] =
2005 (gl_texture_index)inst->TexSrcTarget;
2006 prog->SamplersUsed |= 1 << inst->TexSrcUnit;
2007 if (inst->TexShadow) {
2008 prog->ShadowSamplers |= 1 << inst->TexSrcUnit;
2009 }
2010 }
2011 }
2012
2013 _mesa_update_shader_textures_used(prog);
2014 }
2015
2016 /* Each stage has some uniforms in its Parameters list. The Uniforms
2017 * list for the linked shader program has a pointer to these uniforms
2018 * in each of the stage's Parameters list, so that their values can be
2019 * updated when a uniform is set.
2020 */
2021 static void
2022 link_uniforms_to_shared_uniform_list(struct gl_uniform_list *uniforms,
2023 struct gl_program *prog)
2024 {
2025 unsigned int i;
2026
2027 for (i = 0; i < prog->Parameters->NumParameters; i++) {
2028 const struct gl_program_parameter *p = prog->Parameters->Parameters + i;
2029
2030 if (p->Type == PROGRAM_UNIFORM || p->Type == PROGRAM_SAMPLER) {
2031 struct gl_uniform *uniform =
2032 _mesa_append_uniform(uniforms, p->Name, prog->Target, i);
2033 if (uniform)
2034 uniform->Initialized = p->Initialized;
2035 }
2036 }
2037 }
2038
2039 struct gl_program *
2040 get_mesa_program(GLcontext *ctx, struct gl_shader_program *shader_program,
2041 struct gl_shader *shader)
2042 {
2043 void *mem_ctx = shader_program;
2044 ir_to_mesa_visitor v;
2045 struct prog_instruction *mesa_instructions, *mesa_inst;
2046 ir_instruction **mesa_instruction_annotation;
2047 int i;
2048 struct gl_program *prog;
2049 GLenum target;
2050 const char *target_string;
2051 GLboolean progress;
2052
2053 switch (shader->Type) {
2054 case GL_VERTEX_SHADER:
2055 target = GL_VERTEX_PROGRAM_ARB;
2056 target_string = "vertex";
2057 break;
2058 case GL_FRAGMENT_SHADER:
2059 target = GL_FRAGMENT_PROGRAM_ARB;
2060 target_string = "fragment";
2061 break;
2062 default:
2063 assert(!"should not be reached");
2064 break;
2065 }
2066
2067 validate_ir_tree(shader->ir);
2068
2069 prog = ctx->Driver.NewProgram(ctx, target, 1);
2070 if (!prog)
2071 return NULL;
2072 prog->Parameters = _mesa_new_parameter_list();
2073 prog->Varying = _mesa_new_parameter_list();
2074 prog->Attributes = _mesa_new_parameter_list();
2075 v.ctx = ctx;
2076 v.prog = prog;
2077
2078 v.mem_ctx = talloc_new(NULL);
2079
2080 /* Emit Mesa IR for main(). */
2081 visit_exec_list(shader->ir, &v);
2082 v.ir_to_mesa_emit_op0(NULL, OPCODE_END);
2083
2084 /* Now emit bodies for any functions that were used. */
2085 do {
2086 progress = GL_FALSE;
2087
2088 foreach_iter(exec_list_iterator, iter, v.function_signatures) {
2089 function_entry *entry = (function_entry *)iter.get();
2090
2091 if (!entry->bgn_inst) {
2092 v.current_function = entry;
2093
2094 entry->bgn_inst = v.ir_to_mesa_emit_op0(NULL, OPCODE_BGNSUB);
2095 entry->bgn_inst->function = entry;
2096
2097 visit_exec_list(&entry->sig->body, &v);
2098
2099 entry->bgn_inst = v.ir_to_mesa_emit_op0(NULL, OPCODE_RET);
2100 entry->bgn_inst = v.ir_to_mesa_emit_op0(NULL, OPCODE_ENDSUB);
2101 progress = GL_TRUE;
2102 }
2103 }
2104 } while (progress);
2105
2106 prog->NumTemporaries = v.next_temp;
2107
2108 int num_instructions = 0;
2109 foreach_iter(exec_list_iterator, iter, v.instructions) {
2110 num_instructions++;
2111 }
2112
2113 mesa_instructions =
2114 (struct prog_instruction *)calloc(num_instructions,
2115 sizeof(*mesa_instructions));
2116 mesa_instruction_annotation = talloc_array(mem_ctx, ir_instruction *,
2117 num_instructions);
2118
2119 mesa_inst = mesa_instructions;
2120 i = 0;
2121 foreach_iter(exec_list_iterator, iter, v.instructions) {
2122 ir_to_mesa_instruction *inst = (ir_to_mesa_instruction *)iter.get();
2123
2124 mesa_inst->Opcode = inst->op;
2125 mesa_inst->CondUpdate = inst->cond_update;
2126 mesa_inst->DstReg.File = inst->dst_reg.file;
2127 mesa_inst->DstReg.Index = inst->dst_reg.index;
2128 mesa_inst->DstReg.CondMask = inst->dst_reg.cond_mask;
2129 mesa_inst->DstReg.WriteMask = inst->dst_reg.writemask;
2130 mesa_inst->DstReg.RelAddr = inst->dst_reg.reladdr != NULL;
2131 mesa_inst->SrcReg[0] = mesa_src_reg_from_ir_src_reg(inst->src_reg[0]);
2132 mesa_inst->SrcReg[1] = mesa_src_reg_from_ir_src_reg(inst->src_reg[1]);
2133 mesa_inst->SrcReg[2] = mesa_src_reg_from_ir_src_reg(inst->src_reg[2]);
2134 mesa_inst->TexSrcUnit = inst->sampler;
2135 mesa_inst->TexSrcTarget = inst->tex_target;
2136 mesa_inst->TexShadow = inst->tex_shadow;
2137 mesa_instruction_annotation[i] = inst->ir;
2138
2139 if (ctx->Shader.EmitNoIfs && mesa_inst->Opcode == OPCODE_IF) {
2140 shader_program->InfoLog =
2141 talloc_asprintf_append(shader_program->InfoLog,
2142 "Couldn't flatten if statement\n");
2143 shader_program->LinkStatus = false;
2144 }
2145
2146 if (mesa_inst->Opcode == OPCODE_BGNSUB)
2147 inst->function->inst = i;
2148 else if (mesa_inst->Opcode == OPCODE_CAL)
2149 mesa_inst->BranchTarget = inst->function->sig_id; /* rewritten later */
2150 else if (mesa_inst->Opcode == OPCODE_ARL)
2151 prog->NumAddressRegs = 1;
2152
2153 mesa_inst++;
2154 i++;
2155 }
2156
2157 set_branchtargets(&v, mesa_instructions, num_instructions);
2158 if (ctx->Shader.Flags & GLSL_DUMP) {
2159 printf("Mesa %s program:\n", target_string);
2160 print_program(mesa_instructions, mesa_instruction_annotation,
2161 num_instructions);
2162 }
2163
2164 prog->Instructions = mesa_instructions;
2165 prog->NumInstructions = num_instructions;
2166
2167 _mesa_reference_program(ctx, &shader->Program, prog);
2168
2169 if ((ctx->Shader.Flags & GLSL_NO_OPT) == 0) {
2170 _mesa_optimize_program(ctx, prog);
2171 }
2172
2173 return prog;
2174 }
2175
2176 extern "C" {
2177
2178 void
2179 _mesa_glsl_compile_shader(GLcontext *ctx, struct gl_shader *shader)
2180 {
2181 struct _mesa_glsl_parse_state *state =
2182 new(shader) _mesa_glsl_parse_state(ctx, shader->Type, shader);
2183
2184 const char *source = shader->Source;
2185 state->error = preprocess(state, &source, &state->info_log,
2186 &ctx->Extensions);
2187
2188 if (!state->error) {
2189 _mesa_glsl_lexer_ctor(state, source);
2190 _mesa_glsl_parse(state);
2191 _mesa_glsl_lexer_dtor(state);
2192 }
2193
2194 shader->ir = new(shader) exec_list;
2195 if (!state->error && !state->translation_unit.is_empty())
2196 _mesa_ast_to_hir(shader->ir, state);
2197
2198 if (!state->error && !shader->ir->is_empty()) {
2199 validate_ir_tree(shader->ir);
2200
2201 /* Lowering */
2202 do_mat_op_to_vec(shader->ir);
2203 do_mod_to_fract(shader->ir);
2204 do_div_to_mul_rcp(shader->ir);
2205
2206 /* Optimization passes */
2207 bool progress;
2208 do {
2209 progress = false;
2210
2211 progress = do_function_inlining(shader->ir) || progress;
2212 progress = do_if_simplification(shader->ir) || progress;
2213 progress = do_copy_propagation(shader->ir) || progress;
2214 progress = do_dead_code_local(shader->ir) || progress;
2215 progress = do_dead_code_unlinked(state, shader->ir) || progress;
2216 progress = do_constant_variable_unlinked(shader->ir) || progress;
2217 progress = do_constant_folding(shader->ir) || progress;
2218 progress = do_if_return(shader->ir) || progress;
2219 if (ctx->Shader.EmitNoIfs)
2220 progress = do_if_to_cond_assign(shader->ir) || progress;
2221
2222 progress = do_vec_index_to_swizzle(shader->ir) || progress;
2223 /* Do this one after the previous to let the easier pass handle
2224 * constant vector indexing.
2225 */
2226 progress = do_vec_index_to_cond_assign(shader->ir) || progress;
2227
2228 progress = do_swizzle_swizzle(shader->ir) || progress;
2229 } while (progress);
2230
2231 validate_ir_tree(shader->ir);
2232 }
2233
2234 shader->symbols = state->symbols;
2235
2236 shader->CompileStatus = !state->error;
2237 shader->InfoLog = state->info_log;
2238 shader->Version = state->language_version;
2239 memcpy(shader->builtins_to_link, state->builtins_to_link,
2240 sizeof(shader->builtins_to_link[0]) * state->num_builtins_to_link);
2241 shader->num_builtins_to_link = state->num_builtins_to_link;
2242
2243 /* Retain any live IR, but trash the rest. */
2244 reparent_ir(shader->ir, shader);
2245
2246 talloc_free(state);
2247 }
2248
2249 void
2250 _mesa_glsl_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
2251 {
2252 unsigned int i;
2253
2254 _mesa_clear_shader_program_data(ctx, prog);
2255
2256 prog->LinkStatus = GL_TRUE;
2257
2258 for (i = 0; i < prog->NumShaders; i++) {
2259 if (!prog->Shaders[i]->CompileStatus) {
2260 prog->InfoLog =
2261 talloc_asprintf_append(prog->InfoLog,
2262 "linking with uncompiled shader");
2263 prog->LinkStatus = GL_FALSE;
2264 }
2265 }
2266
2267 prog->Varying = _mesa_new_parameter_list();
2268 _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL);
2269 _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL);
2270
2271 if (prog->LinkStatus) {
2272 link_shaders(prog);
2273
2274 /* We don't use the linker's uniforms list, and cook up our own at
2275 * generate time.
2276 */
2277 free(prog->Uniforms);
2278 prog->Uniforms = _mesa_new_uniform_list();
2279 }
2280
2281 if (prog->LinkStatus) {
2282 for (i = 0; i < prog->_NumLinkedShaders; i++) {
2283 struct gl_program *linked_prog;
2284
2285 linked_prog = get_mesa_program(ctx, prog,
2286 prog->_LinkedShaders[i]);
2287 count_resources(linked_prog);
2288
2289 link_uniforms_to_shared_uniform_list(prog->Uniforms, linked_prog);
2290
2291 switch (prog->_LinkedShaders[i]->Type) {
2292 case GL_VERTEX_SHADER:
2293 _mesa_reference_vertprog(ctx, &prog->VertexProgram,
2294 (struct gl_vertex_program *)linked_prog);
2295 ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB,
2296 linked_prog);
2297 break;
2298 case GL_FRAGMENT_SHADER:
2299 _mesa_reference_fragprog(ctx, &prog->FragmentProgram,
2300 (struct gl_fragment_program *)linked_prog);
2301 ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB,
2302 linked_prog);
2303 break;
2304 }
2305 }
2306 }
2307 }
2308
2309 } /* extern "C" */