ba0934c446dd5cd5e3850e9a967fe311321cafaa
[mesa.git] / src / mesa / program / ir_to_mesa.cpp
1 /*
2 * Copyright (C) 2005-2007 Brian Paul All Rights Reserved.
3 * Copyright (C) 2008 VMware, Inc. All Rights Reserved.
4 * Copyright © 2010 Intel Corporation
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
24 */
25
26 /**
27 * \file ir_to_mesa.cpp
28 *
29 * Translates the IR to ARB_fragment_program text if possible,
30 * printing the result
31 */
32
33 #include <stdio.h>
34 #include "ir.h"
35 #include "ir_visitor.h"
36 #include "ir_print_visitor.h"
37 #include "ir_expression_flattening.h"
38 #include "glsl_types.h"
39 #include "glsl_parser_extras.h"
40 #include "../glsl/program.h"
41 #include "ir_optimization.h"
42 #include "ast.h"
43
44 extern "C" {
45 #include "main/mtypes.h"
46 #include "main/shaderobj.h"
47 #include "main/uniforms.h"
48 #include "program/prog_instruction.h"
49 #include "program/prog_optimize.h"
50 #include "program/prog_print.h"
51 #include "program/program.h"
52 #include "program/prog_uniform.h"
53 #include "program/prog_parameter.h"
54 }
55
56 static int swizzle_for_size(int size);
57
58 /**
59 * This struct is a corresponding struct to Mesa prog_src_register, with
60 * wider fields.
61 */
62 typedef struct ir_to_mesa_src_reg {
63 ir_to_mesa_src_reg(int file, int index, const glsl_type *type)
64 {
65 this->file = file;
66 this->index = index;
67 if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
68 this->swizzle = swizzle_for_size(type->vector_elements);
69 else
70 this->swizzle = SWIZZLE_XYZW;
71 this->negate = 0;
72 this->reladdr = NULL;
73 }
74
75 ir_to_mesa_src_reg()
76 {
77 this->file = PROGRAM_UNDEFINED;
78 }
79
80 int file; /**< PROGRAM_* from Mesa */
81 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
82 GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
83 int negate; /**< NEGATE_XYZW mask from mesa */
84 /** Register index should be offset by the integer in this reg. */
85 ir_to_mesa_src_reg *reladdr;
86 } ir_to_mesa_src_reg;
87
88 typedef struct ir_to_mesa_dst_reg {
89 int file; /**< PROGRAM_* from Mesa */
90 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
91 int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
92 GLuint cond_mask:4;
93 /** Register index should be offset by the integer in this reg. */
94 ir_to_mesa_src_reg *reladdr;
95 } ir_to_mesa_dst_reg;
96
97 extern ir_to_mesa_src_reg ir_to_mesa_undef;
98
99 class ir_to_mesa_instruction : public exec_node {
100 public:
101 enum prog_opcode op;
102 ir_to_mesa_dst_reg dst_reg;
103 ir_to_mesa_src_reg src_reg[3];
104 /** Pointer to the ir source this tree came from for debugging */
105 ir_instruction *ir;
106 GLboolean cond_update;
107 int sampler; /**< sampler index */
108 int tex_target; /**< One of TEXTURE_*_INDEX */
109 GLboolean tex_shadow;
110
111 class function_entry *function; /* Set on OPCODE_CAL or OPCODE_BGNSUB */
112 };
113
114 class variable_storage : public exec_node {
115 public:
116 variable_storage(ir_variable *var, int file, int index)
117 : file(file), index(index), var(var)
118 {
119 /* empty */
120 }
121
122 int file;
123 int index;
124 ir_variable *var; /* variable that maps to this, if any */
125 };
126
127 class function_entry : public exec_node {
128 public:
129 ir_function_signature *sig;
130
131 /**
132 * identifier of this function signature used by the program.
133 *
134 * At the point that Mesa instructions for function calls are
135 * generated, we don't know the address of the first instruction of
136 * the function body. So we make the BranchTarget that is called a
137 * small integer and rewrite them during set_branchtargets().
138 */
139 int sig_id;
140
141 /**
142 * Pointer to first instruction of the function body.
143 *
144 * Set during function body emits after main() is processed.
145 */
146 ir_to_mesa_instruction *bgn_inst;
147
148 /**
149 * Index of the first instruction of the function body in actual
150 * Mesa IR.
151 *
152 * Set after convertion from ir_to_mesa_instruction to prog_instruction.
153 */
154 int inst;
155
156 /** Storage for the return value. */
157 ir_to_mesa_src_reg return_reg;
158 };
159
160 class ir_to_mesa_visitor : public ir_visitor {
161 public:
162 ir_to_mesa_visitor();
163
164 function_entry *current_function;
165
166 GLcontext *ctx;
167 struct gl_program *prog;
168
169 int next_temp;
170
171 variable_storage *find_variable_storage(ir_variable *var);
172
173 function_entry *get_function_signature(ir_function_signature *sig);
174
175 ir_to_mesa_src_reg get_temp(const glsl_type *type);
176 void reladdr_to_temp(ir_instruction *ir,
177 ir_to_mesa_src_reg *reg, int *num_reladdr);
178
179 struct ir_to_mesa_src_reg src_reg_for_float(float val);
180
181 /**
182 * \name Visit methods
183 *
184 * As typical for the visitor pattern, there must be one \c visit method for
185 * each concrete subclass of \c ir_instruction. Virtual base classes within
186 * the hierarchy should not have \c visit methods.
187 */
188 /*@{*/
189 virtual void visit(ir_variable *);
190 virtual void visit(ir_loop *);
191 virtual void visit(ir_loop_jump *);
192 virtual void visit(ir_function_signature *);
193 virtual void visit(ir_function *);
194 virtual void visit(ir_expression *);
195 virtual void visit(ir_swizzle *);
196 virtual void visit(ir_dereference_variable *);
197 virtual void visit(ir_dereference_array *);
198 virtual void visit(ir_dereference_record *);
199 virtual void visit(ir_assignment *);
200 virtual void visit(ir_constant *);
201 virtual void visit(ir_call *);
202 virtual void visit(ir_return *);
203 virtual void visit(ir_discard *);
204 virtual void visit(ir_texture *);
205 virtual void visit(ir_if *);
206 /*@}*/
207
208 struct ir_to_mesa_src_reg result;
209
210 /** List of variable_storage */
211 exec_list variables;
212
213 /** List of function_entry */
214 exec_list function_signatures;
215 int next_signature_id;
216
217 /** List of ir_to_mesa_instruction */
218 exec_list instructions;
219
220 ir_to_mesa_instruction *ir_to_mesa_emit_op0(ir_instruction *ir,
221 enum prog_opcode op);
222
223 ir_to_mesa_instruction *ir_to_mesa_emit_op1(ir_instruction *ir,
224 enum prog_opcode op,
225 ir_to_mesa_dst_reg dst,
226 ir_to_mesa_src_reg src0);
227
228 ir_to_mesa_instruction *ir_to_mesa_emit_op2(ir_instruction *ir,
229 enum prog_opcode op,
230 ir_to_mesa_dst_reg dst,
231 ir_to_mesa_src_reg src0,
232 ir_to_mesa_src_reg src1);
233
234 ir_to_mesa_instruction *ir_to_mesa_emit_op3(ir_instruction *ir,
235 enum prog_opcode op,
236 ir_to_mesa_dst_reg dst,
237 ir_to_mesa_src_reg src0,
238 ir_to_mesa_src_reg src1,
239 ir_to_mesa_src_reg src2);
240
241 void ir_to_mesa_emit_scalar_op1(ir_instruction *ir,
242 enum prog_opcode op,
243 ir_to_mesa_dst_reg dst,
244 ir_to_mesa_src_reg src0);
245
246 void ir_to_mesa_emit_scalar_op2(ir_instruction *ir,
247 enum prog_opcode op,
248 ir_to_mesa_dst_reg dst,
249 ir_to_mesa_src_reg src0,
250 ir_to_mesa_src_reg src1);
251
252 GLboolean try_emit_mad(ir_expression *ir,
253 int mul_operand);
254
255 int add_uniform(const char *name,
256 const glsl_type *type,
257 ir_constant *constant);
258 void add_aggregate_uniform(ir_instruction *ir,
259 const char *name,
260 const struct glsl_type *type,
261 ir_constant *constant,
262 struct ir_to_mesa_dst_reg temp);
263
264 int *sampler_map;
265 int sampler_map_size;
266
267 void map_sampler(int location, int sampler);
268 int get_sampler_number(int location);
269
270 void *mem_ctx;
271 };
272
273 ir_to_mesa_src_reg ir_to_mesa_undef = ir_to_mesa_src_reg(PROGRAM_UNDEFINED, 0, NULL);
274
275 ir_to_mesa_dst_reg ir_to_mesa_undef_dst = {
276 PROGRAM_UNDEFINED, 0, SWIZZLE_NOOP, COND_TR, NULL,
277 };
278
279 ir_to_mesa_dst_reg ir_to_mesa_address_reg = {
280 PROGRAM_ADDRESS, 0, WRITEMASK_X, COND_TR, NULL
281 };
282
283 static int swizzle_for_size(int size)
284 {
285 int size_swizzles[4] = {
286 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
287 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
288 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
289 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
290 };
291
292 return size_swizzles[size - 1];
293 }
294
295 ir_to_mesa_instruction *
296 ir_to_mesa_visitor::ir_to_mesa_emit_op3(ir_instruction *ir,
297 enum prog_opcode op,
298 ir_to_mesa_dst_reg dst,
299 ir_to_mesa_src_reg src0,
300 ir_to_mesa_src_reg src1,
301 ir_to_mesa_src_reg src2)
302 {
303 ir_to_mesa_instruction *inst = new(mem_ctx) ir_to_mesa_instruction();
304 int num_reladdr = 0;
305
306 /* If we have to do relative addressing, we want to load the ARL
307 * reg directly for one of the regs, and preload the other reladdr
308 * sources into temps.
309 */
310 num_reladdr += dst.reladdr != NULL;
311 num_reladdr += src0.reladdr != NULL;
312 num_reladdr += src1.reladdr != NULL;
313 num_reladdr += src2.reladdr != NULL;
314
315 reladdr_to_temp(ir, &src2, &num_reladdr);
316 reladdr_to_temp(ir, &src1, &num_reladdr);
317 reladdr_to_temp(ir, &src0, &num_reladdr);
318
319 if (dst.reladdr) {
320 ir_to_mesa_emit_op1(ir, OPCODE_ARL, ir_to_mesa_address_reg,
321 *dst.reladdr);
322
323 num_reladdr--;
324 }
325 assert(num_reladdr == 0);
326
327 inst->op = op;
328 inst->dst_reg = dst;
329 inst->src_reg[0] = src0;
330 inst->src_reg[1] = src1;
331 inst->src_reg[2] = src2;
332 inst->ir = ir;
333
334 inst->function = NULL;
335
336 this->instructions.push_tail(inst);
337
338 return inst;
339 }
340
341
342 ir_to_mesa_instruction *
343 ir_to_mesa_visitor::ir_to_mesa_emit_op2(ir_instruction *ir,
344 enum prog_opcode op,
345 ir_to_mesa_dst_reg dst,
346 ir_to_mesa_src_reg src0,
347 ir_to_mesa_src_reg src1)
348 {
349 return ir_to_mesa_emit_op3(ir, op, dst, src0, src1, ir_to_mesa_undef);
350 }
351
352 ir_to_mesa_instruction *
353 ir_to_mesa_visitor::ir_to_mesa_emit_op1(ir_instruction *ir,
354 enum prog_opcode op,
355 ir_to_mesa_dst_reg dst,
356 ir_to_mesa_src_reg src0)
357 {
358 return ir_to_mesa_emit_op3(ir, op, dst,
359 src0, ir_to_mesa_undef, ir_to_mesa_undef);
360 }
361
362 ir_to_mesa_instruction *
363 ir_to_mesa_visitor::ir_to_mesa_emit_op0(ir_instruction *ir,
364 enum prog_opcode op)
365 {
366 return ir_to_mesa_emit_op3(ir, op, ir_to_mesa_undef_dst,
367 ir_to_mesa_undef,
368 ir_to_mesa_undef,
369 ir_to_mesa_undef);
370 }
371
372 void
373 ir_to_mesa_visitor::map_sampler(int location, int sampler)
374 {
375 if (this->sampler_map_size <= location) {
376 this->sampler_map = talloc_realloc(this->mem_ctx, this->sampler_map,
377 int, location + 1);
378 this->sampler_map_size = location + 1;
379 }
380
381 this->sampler_map[location] = sampler;
382 }
383
384 int
385 ir_to_mesa_visitor::get_sampler_number(int location)
386 {
387 assert(location < this->sampler_map_size);
388 return this->sampler_map[location];
389 }
390
391 inline ir_to_mesa_dst_reg
392 ir_to_mesa_dst_reg_from_src(ir_to_mesa_src_reg reg)
393 {
394 ir_to_mesa_dst_reg dst_reg;
395
396 dst_reg.file = reg.file;
397 dst_reg.index = reg.index;
398 dst_reg.writemask = WRITEMASK_XYZW;
399 dst_reg.cond_mask = COND_TR;
400 dst_reg.reladdr = reg.reladdr;
401
402 return dst_reg;
403 }
404
405 inline ir_to_mesa_src_reg
406 ir_to_mesa_src_reg_from_dst(ir_to_mesa_dst_reg reg)
407 {
408 return ir_to_mesa_src_reg(reg.file, reg.index, NULL);
409 }
410
411 /**
412 * Emits Mesa scalar opcodes to produce unique answers across channels.
413 *
414 * Some Mesa opcodes are scalar-only, like ARB_fp/vp. The src X
415 * channel determines the result across all channels. So to do a vec4
416 * of this operation, we want to emit a scalar per source channel used
417 * to produce dest channels.
418 */
419 void
420 ir_to_mesa_visitor::ir_to_mesa_emit_scalar_op2(ir_instruction *ir,
421 enum prog_opcode op,
422 ir_to_mesa_dst_reg dst,
423 ir_to_mesa_src_reg orig_src0,
424 ir_to_mesa_src_reg orig_src1)
425 {
426 int i, j;
427 int done_mask = ~dst.writemask;
428
429 /* Mesa RCP is a scalar operation splatting results to all channels,
430 * like ARB_fp/vp. So emit as many RCPs as necessary to cover our
431 * dst channels.
432 */
433 for (i = 0; i < 4; i++) {
434 GLuint this_mask = (1 << i);
435 ir_to_mesa_instruction *inst;
436 ir_to_mesa_src_reg src0 = orig_src0;
437 ir_to_mesa_src_reg src1 = orig_src1;
438
439 if (done_mask & this_mask)
440 continue;
441
442 GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
443 GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
444 for (j = i + 1; j < 4; j++) {
445 if (!(done_mask & (1 << j)) &&
446 GET_SWZ(src0.swizzle, j) == src0_swiz &&
447 GET_SWZ(src1.swizzle, j) == src1_swiz) {
448 this_mask |= (1 << j);
449 }
450 }
451 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
452 src0_swiz, src0_swiz);
453 src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
454 src1_swiz, src1_swiz);
455
456 inst = ir_to_mesa_emit_op2(ir, op,
457 dst,
458 src0,
459 src1);
460 inst->dst_reg.writemask = this_mask;
461 done_mask |= this_mask;
462 }
463 }
464
465 void
466 ir_to_mesa_visitor::ir_to_mesa_emit_scalar_op1(ir_instruction *ir,
467 enum prog_opcode op,
468 ir_to_mesa_dst_reg dst,
469 ir_to_mesa_src_reg src0)
470 {
471 ir_to_mesa_src_reg undef = ir_to_mesa_undef;
472
473 undef.swizzle = SWIZZLE_XXXX;
474
475 ir_to_mesa_emit_scalar_op2(ir, op, dst, src0, undef);
476 }
477
478 struct ir_to_mesa_src_reg
479 ir_to_mesa_visitor::src_reg_for_float(float val)
480 {
481 ir_to_mesa_src_reg src_reg(PROGRAM_CONSTANT, -1, NULL);
482
483 src_reg.index = _mesa_add_unnamed_constant(this->prog->Parameters,
484 &val, 1, &src_reg.swizzle);
485
486 return src_reg;
487 }
488
489 static int
490 type_size(const struct glsl_type *type)
491 {
492 unsigned int i;
493 int size;
494
495 switch (type->base_type) {
496 case GLSL_TYPE_UINT:
497 case GLSL_TYPE_INT:
498 case GLSL_TYPE_FLOAT:
499 case GLSL_TYPE_BOOL:
500 if (type->is_matrix()) {
501 return type->matrix_columns;
502 } else {
503 /* Regardless of size of vector, it gets a vec4. This is bad
504 * packing for things like floats, but otherwise arrays become a
505 * mess. Hopefully a later pass over the code can pack scalars
506 * down if appropriate.
507 */
508 return 1;
509 }
510 case GLSL_TYPE_ARRAY:
511 return type_size(type->fields.array) * type->length;
512 case GLSL_TYPE_STRUCT:
513 size = 0;
514 for (i = 0; i < type->length; i++) {
515 size += type_size(type->fields.structure[i].type);
516 }
517 return size;
518 default:
519 assert(0);
520 }
521 }
522
523 /**
524 * In the initial pass of codegen, we assign temporary numbers to
525 * intermediate results. (not SSA -- variable assignments will reuse
526 * storage). Actual register allocation for the Mesa VM occurs in a
527 * pass over the Mesa IR later.
528 */
529 ir_to_mesa_src_reg
530 ir_to_mesa_visitor::get_temp(const glsl_type *type)
531 {
532 ir_to_mesa_src_reg src_reg;
533 int swizzle[4];
534 int i;
535
536 src_reg.file = PROGRAM_TEMPORARY;
537 src_reg.index = next_temp;
538 src_reg.reladdr = NULL;
539 next_temp += type_size(type);
540
541 if (type->is_array() || type->is_record()) {
542 src_reg.swizzle = SWIZZLE_NOOP;
543 } else {
544 for (i = 0; i < type->vector_elements; i++)
545 swizzle[i] = i;
546 for (; i < 4; i++)
547 swizzle[i] = type->vector_elements - 1;
548 src_reg.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1],
549 swizzle[2], swizzle[3]);
550 }
551 src_reg.negate = 0;
552
553 return src_reg;
554 }
555
556 variable_storage *
557 ir_to_mesa_visitor::find_variable_storage(ir_variable *var)
558 {
559
560 variable_storage *entry;
561
562 foreach_iter(exec_list_iterator, iter, this->variables) {
563 entry = (variable_storage *)iter.get();
564
565 if (entry->var == var)
566 return entry;
567 }
568
569 return NULL;
570 }
571
572 void
573 ir_to_mesa_visitor::visit(ir_variable *ir)
574 {
575 if (strcmp(ir->name, "gl_FragCoord") == 0) {
576 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
577
578 fp->OriginUpperLeft = ir->origin_upper_left;
579 fp->PixelCenterInteger = ir->pixel_center_integer;
580 }
581 }
582
583 void
584 ir_to_mesa_visitor::visit(ir_loop *ir)
585 {
586 assert(!ir->from);
587 assert(!ir->to);
588 assert(!ir->increment);
589 assert(!ir->counter);
590
591 ir_to_mesa_emit_op0(NULL, OPCODE_BGNLOOP);
592 visit_exec_list(&ir->body_instructions, this);
593 ir_to_mesa_emit_op0(NULL, OPCODE_ENDLOOP);
594 }
595
596 void
597 ir_to_mesa_visitor::visit(ir_loop_jump *ir)
598 {
599 switch (ir->mode) {
600 case ir_loop_jump::jump_break:
601 ir_to_mesa_emit_op0(NULL, OPCODE_BRK);
602 break;
603 case ir_loop_jump::jump_continue:
604 ir_to_mesa_emit_op0(NULL, OPCODE_CONT);
605 break;
606 }
607 }
608
609
610 void
611 ir_to_mesa_visitor::visit(ir_function_signature *ir)
612 {
613 assert(0);
614 (void)ir;
615 }
616
617 void
618 ir_to_mesa_visitor::visit(ir_function *ir)
619 {
620 /* Ignore function bodies other than main() -- we shouldn't see calls to
621 * them since they should all be inlined before we get to ir_to_mesa.
622 */
623 if (strcmp(ir->name, "main") == 0) {
624 const ir_function_signature *sig;
625 exec_list empty;
626
627 sig = ir->matching_signature(&empty);
628
629 assert(sig);
630
631 foreach_iter(exec_list_iterator, iter, sig->body) {
632 ir_instruction *ir = (ir_instruction *)iter.get();
633
634 ir->accept(this);
635 }
636 }
637 }
638
639 GLboolean
640 ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
641 {
642 int nonmul_operand = 1 - mul_operand;
643 ir_to_mesa_src_reg a, b, c;
644
645 ir_expression *expr = ir->operands[mul_operand]->as_expression();
646 if (!expr || expr->operation != ir_binop_mul)
647 return false;
648
649 expr->operands[0]->accept(this);
650 a = this->result;
651 expr->operands[1]->accept(this);
652 b = this->result;
653 ir->operands[nonmul_operand]->accept(this);
654 c = this->result;
655
656 this->result = get_temp(ir->type);
657 ir_to_mesa_emit_op3(ir, OPCODE_MAD,
658 ir_to_mesa_dst_reg_from_src(this->result), a, b, c);
659
660 return true;
661 }
662
663 void
664 ir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir,
665 ir_to_mesa_src_reg *reg, int *num_reladdr)
666 {
667 if (!reg->reladdr)
668 return;
669
670 ir_to_mesa_emit_op1(ir, OPCODE_ARL, ir_to_mesa_address_reg, *reg->reladdr);
671
672 if (*num_reladdr != 1) {
673 ir_to_mesa_src_reg temp = get_temp(glsl_type::vec4_type);
674
675 ir_to_mesa_emit_op1(ir, OPCODE_MOV,
676 ir_to_mesa_dst_reg_from_src(temp), *reg);
677 *reg = temp;
678 }
679
680 (*num_reladdr)--;
681 }
682
683 void
684 ir_to_mesa_visitor::visit(ir_expression *ir)
685 {
686 unsigned int operand;
687 struct ir_to_mesa_src_reg op[2];
688 struct ir_to_mesa_src_reg result_src;
689 struct ir_to_mesa_dst_reg result_dst;
690 const glsl_type *vec4_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 4, 1);
691 const glsl_type *vec3_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 3, 1);
692 const glsl_type *vec2_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 2, 1);
693
694 /* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c)
695 */
696 if (ir->operation == ir_binop_add) {
697 if (try_emit_mad(ir, 1))
698 return;
699 if (try_emit_mad(ir, 0))
700 return;
701 }
702
703 for (operand = 0; operand < ir->get_num_operands(); operand++) {
704 this->result.file = PROGRAM_UNDEFINED;
705 ir->operands[operand]->accept(this);
706 if (this->result.file == PROGRAM_UNDEFINED) {
707 ir_print_visitor v;
708 printf("Failed to get tree for expression operand:\n");
709 ir->operands[operand]->accept(&v);
710 exit(1);
711 }
712 op[operand] = this->result;
713
714 /* Matrix expression operands should have been broken down to vector
715 * operations already.
716 */
717 assert(!ir->operands[operand]->type->is_matrix());
718 }
719
720 this->result.file = PROGRAM_UNDEFINED;
721
722 /* Storage for our result. Ideally for an assignment we'd be using
723 * the actual storage for the result here, instead.
724 */
725 result_src = get_temp(ir->type);
726 /* convenience for the emit functions below. */
727 result_dst = ir_to_mesa_dst_reg_from_src(result_src);
728 /* Limit writes to the channels that will be used by result_src later.
729 * This does limit this temp's use as a temporary for multi-instruction
730 * sequences.
731 */
732 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
733
734 switch (ir->operation) {
735 case ir_unop_logic_not:
736 ir_to_mesa_emit_op2(ir, OPCODE_SEQ, result_dst,
737 op[0], src_reg_for_float(0.0));
738 break;
739 case ir_unop_neg:
740 op[0].negate = ~op[0].negate;
741 result_src = op[0];
742 break;
743 case ir_unop_abs:
744 ir_to_mesa_emit_op1(ir, OPCODE_ABS, result_dst, op[0]);
745 break;
746 case ir_unop_sign:
747 ir_to_mesa_emit_op1(ir, OPCODE_SSG, result_dst, op[0]);
748 break;
749 case ir_unop_rcp:
750 ir_to_mesa_emit_scalar_op1(ir, OPCODE_RCP, result_dst, op[0]);
751 break;
752
753 case ir_unop_exp:
754 ir_to_mesa_emit_scalar_op2(ir, OPCODE_POW, result_dst,
755 src_reg_for_float(M_E), op[0]);
756 break;
757 case ir_unop_exp2:
758 ir_to_mesa_emit_scalar_op1(ir, OPCODE_EX2, result_dst, op[0]);
759 break;
760 case ir_unop_log:
761 ir_to_mesa_emit_scalar_op1(ir, OPCODE_LOG, result_dst, op[0]);
762 break;
763 case ir_unop_log2:
764 ir_to_mesa_emit_scalar_op1(ir, OPCODE_LG2, result_dst, op[0]);
765 break;
766 case ir_unop_sin:
767 ir_to_mesa_emit_scalar_op1(ir, OPCODE_SIN, result_dst, op[0]);
768 break;
769 case ir_unop_cos:
770 ir_to_mesa_emit_scalar_op1(ir, OPCODE_COS, result_dst, op[0]);
771 break;
772
773 case ir_unop_dFdx:
774 ir_to_mesa_emit_op1(ir, OPCODE_DDX, result_dst, op[0]);
775 break;
776 case ir_unop_dFdy:
777 ir_to_mesa_emit_op1(ir, OPCODE_DDY, result_dst, op[0]);
778 break;
779
780 case ir_binop_add:
781 ir_to_mesa_emit_op2(ir, OPCODE_ADD, result_dst, op[0], op[1]);
782 break;
783 case ir_binop_sub:
784 ir_to_mesa_emit_op2(ir, OPCODE_SUB, result_dst, op[0], op[1]);
785 break;
786
787 case ir_binop_mul:
788 ir_to_mesa_emit_op2(ir, OPCODE_MUL, result_dst, op[0], op[1]);
789 break;
790 case ir_binop_div:
791 assert(!"not reached: should be handled by ir_div_to_mul_rcp");
792 case ir_binop_mod:
793 assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
794 break;
795
796 case ir_binop_less:
797 ir_to_mesa_emit_op2(ir, OPCODE_SLT, result_dst, op[0], op[1]);
798 break;
799 case ir_binop_greater:
800 ir_to_mesa_emit_op2(ir, OPCODE_SGT, result_dst, op[0], op[1]);
801 break;
802 case ir_binop_lequal:
803 ir_to_mesa_emit_op2(ir, OPCODE_SLE, result_dst, op[0], op[1]);
804 break;
805 case ir_binop_gequal:
806 ir_to_mesa_emit_op2(ir, OPCODE_SGE, result_dst, op[0], op[1]);
807 break;
808 case ir_binop_equal:
809 ir_to_mesa_emit_op2(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
810 break;
811 case ir_binop_logic_xor:
812 case ir_binop_nequal:
813 ir_to_mesa_emit_op2(ir, OPCODE_SNE, result_dst, op[0], op[1]);
814 break;
815
816 case ir_binop_logic_or:
817 /* This could be a saturated add and skip the SNE. */
818 ir_to_mesa_emit_op2(ir, OPCODE_ADD,
819 result_dst,
820 op[0], op[1]);
821
822 ir_to_mesa_emit_op2(ir, OPCODE_SNE,
823 result_dst,
824 result_src, src_reg_for_float(0.0));
825 break;
826
827 case ir_binop_logic_and:
828 /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
829 ir_to_mesa_emit_op2(ir, OPCODE_MUL,
830 result_dst,
831 op[0], op[1]);
832 break;
833
834 case ir_binop_dot:
835 if (ir->operands[0]->type == vec4_type) {
836 assert(ir->operands[1]->type == vec4_type);
837 ir_to_mesa_emit_op2(ir, OPCODE_DP4,
838 result_dst,
839 op[0], op[1]);
840 } else if (ir->operands[0]->type == vec3_type) {
841 assert(ir->operands[1]->type == vec3_type);
842 ir_to_mesa_emit_op2(ir, OPCODE_DP3,
843 result_dst,
844 op[0], op[1]);
845 } else if (ir->operands[0]->type == vec2_type) {
846 assert(ir->operands[1]->type == vec2_type);
847 ir_to_mesa_emit_op2(ir, OPCODE_DP2,
848 result_dst,
849 op[0], op[1]);
850 }
851 break;
852
853 case ir_binop_cross:
854 ir_to_mesa_emit_op2(ir, OPCODE_XPD, result_dst, op[0], op[1]);
855 break;
856
857 case ir_unop_sqrt:
858 ir_to_mesa_emit_scalar_op1(ir, OPCODE_RSQ, result_dst, op[0]);
859 ir_to_mesa_emit_scalar_op1(ir, OPCODE_RCP, result_dst, result_src);
860 /* For incoming channels < 0, set the result to 0. */
861 ir_to_mesa_emit_op3(ir, OPCODE_CMP, result_dst,
862 op[0], src_reg_for_float(0.0), result_src);
863 break;
864 case ir_unop_rsq:
865 ir_to_mesa_emit_scalar_op1(ir, OPCODE_RSQ, result_dst, op[0]);
866 break;
867 case ir_unop_i2f:
868 case ir_unop_b2f:
869 case ir_unop_b2i:
870 /* Mesa IR lacks types, ints are stored as truncated floats. */
871 result_src = op[0];
872 break;
873 case ir_unop_f2i:
874 ir_to_mesa_emit_op1(ir, OPCODE_TRUNC, result_dst, op[0]);
875 break;
876 case ir_unop_f2b:
877 case ir_unop_i2b:
878 ir_to_mesa_emit_op2(ir, OPCODE_SNE, result_dst,
879 result_src, src_reg_for_float(0.0));
880 break;
881 case ir_unop_trunc:
882 ir_to_mesa_emit_op1(ir, OPCODE_TRUNC, result_dst, op[0]);
883 break;
884 case ir_unop_ceil:
885 op[0].negate = ~op[0].negate;
886 ir_to_mesa_emit_op1(ir, OPCODE_FLR, result_dst, op[0]);
887 result_src.negate = ~result_src.negate;
888 break;
889 case ir_unop_floor:
890 ir_to_mesa_emit_op1(ir, OPCODE_FLR, result_dst, op[0]);
891 break;
892 case ir_unop_fract:
893 ir_to_mesa_emit_op1(ir, OPCODE_FRC, result_dst, op[0]);
894 break;
895
896 case ir_binop_min:
897 ir_to_mesa_emit_op2(ir, OPCODE_MIN, result_dst, op[0], op[1]);
898 break;
899 case ir_binop_max:
900 ir_to_mesa_emit_op2(ir, OPCODE_MAX, result_dst, op[0], op[1]);
901 break;
902 case ir_binop_pow:
903 ir_to_mesa_emit_scalar_op2(ir, OPCODE_POW, result_dst, op[0], op[1]);
904 break;
905
906 case ir_unop_bit_not:
907 case ir_unop_u2f:
908 case ir_binop_lshift:
909 case ir_binop_rshift:
910 case ir_binop_bit_and:
911 case ir_binop_bit_xor:
912 case ir_binop_bit_or:
913 assert(!"GLSL 1.30 features unsupported");
914 break;
915 }
916
917 this->result = result_src;
918 }
919
920
921 void
922 ir_to_mesa_visitor::visit(ir_swizzle *ir)
923 {
924 ir_to_mesa_src_reg src_reg;
925 int i;
926 int swizzle[4];
927
928 /* Note that this is only swizzles in expressions, not those on the left
929 * hand side of an assignment, which do write masking. See ir_assignment
930 * for that.
931 */
932
933 ir->val->accept(this);
934 src_reg = this->result;
935 assert(src_reg.file != PROGRAM_UNDEFINED);
936
937 for (i = 0; i < 4; i++) {
938 if (i < ir->type->vector_elements) {
939 switch (i) {
940 case 0:
941 swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.x);
942 break;
943 case 1:
944 swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.y);
945 break;
946 case 2:
947 swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.z);
948 break;
949 case 3:
950 swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.w);
951 break;
952 }
953 } else {
954 /* If the type is smaller than a vec4, replicate the last
955 * channel out.
956 */
957 swizzle[i] = swizzle[ir->type->vector_elements - 1];
958 }
959 }
960
961 src_reg.swizzle = MAKE_SWIZZLE4(swizzle[0],
962 swizzle[1],
963 swizzle[2],
964 swizzle[3]);
965
966 this->result = src_reg;
967 }
968
969 static const struct {
970 const char *name;
971 const char *field;
972 int tokens[STATE_LENGTH];
973 int swizzle;
974 bool array_indexed;
975 } statevars[] = {
976 {"gl_DepthRange", "near",
977 {STATE_DEPTH_RANGE, 0, 0}, SWIZZLE_XXXX},
978 {"gl_DepthRange", "far",
979 {STATE_DEPTH_RANGE, 0, 0}, SWIZZLE_YYYY},
980 {"gl_DepthRange", "diff",
981 {STATE_DEPTH_RANGE, 0, 0}, SWIZZLE_ZZZZ},
982
983 {"gl_ClipPlane", NULL,
984 {STATE_CLIPPLANE, 0, 0}, SWIZZLE_XYZW, true}
985 ,
986 {"gl_Point", "size",
987 {STATE_POINT_SIZE}, SWIZZLE_XXXX},
988 {"gl_Point", "sizeMin",
989 {STATE_POINT_SIZE}, SWIZZLE_YYYY},
990 {"gl_Point", "sizeMax",
991 {STATE_POINT_SIZE}, SWIZZLE_ZZZZ},
992 {"gl_Point", "fadeThresholdSize",
993 {STATE_POINT_SIZE}, SWIZZLE_WWWW},
994 {"gl_Point", "distanceConstantAttenuation",
995 {STATE_POINT_ATTENUATION}, SWIZZLE_XXXX},
996 {"gl_Point", "distanceLinearAttenuation",
997 {STATE_POINT_ATTENUATION}, SWIZZLE_YYYY},
998 {"gl_Point", "distanceQuadraticAttenuation",
999 {STATE_POINT_ATTENUATION}, SWIZZLE_ZZZZ},
1000
1001 {"gl_FrontMaterial", "emission",
1002 {STATE_MATERIAL, 0, STATE_EMISSION}, SWIZZLE_XYZW},
1003 {"gl_FrontMaterial", "ambient",
1004 {STATE_MATERIAL, 0, STATE_AMBIENT}, SWIZZLE_XYZW},
1005 {"gl_FrontMaterial", "diffuse",
1006 {STATE_MATERIAL, 0, STATE_DIFFUSE}, SWIZZLE_XYZW},
1007 {"gl_FrontMaterial", "specular",
1008 {STATE_MATERIAL, 0, STATE_SPECULAR}, SWIZZLE_XYZW},
1009 {"gl_FrontMaterial", "shininess",
1010 {STATE_MATERIAL, 0, STATE_SHININESS}, SWIZZLE_XXXX},
1011
1012 {"gl_BackMaterial", "emission",
1013 {STATE_MATERIAL, 1, STATE_EMISSION}, SWIZZLE_XYZW},
1014 {"gl_BackMaterial", "ambient",
1015 {STATE_MATERIAL, 1, STATE_AMBIENT}, SWIZZLE_XYZW},
1016 {"gl_BackMaterial", "diffuse",
1017 {STATE_MATERIAL, 1, STATE_DIFFUSE}, SWIZZLE_XYZW},
1018 {"gl_BackMaterial", "specular",
1019 {STATE_MATERIAL, 1, STATE_SPECULAR}, SWIZZLE_XYZW},
1020 {"gl_BackMaterial", "shininess",
1021 {STATE_MATERIAL, 1, STATE_SHININESS}, SWIZZLE_XXXX},
1022
1023 {"gl_LightSource", "ambient",
1024 {STATE_LIGHT, 0, STATE_AMBIENT}, SWIZZLE_XYZW, true},
1025 {"gl_LightSource", "diffuse",
1026 {STATE_LIGHT, 0, STATE_DIFFUSE}, SWIZZLE_XYZW, true},
1027 {"gl_LightSource", "specular",
1028 {STATE_LIGHT, 0, STATE_SPECULAR}, SWIZZLE_XYZW, true},
1029 {"gl_LightSource", "position",
1030 {STATE_LIGHT, 0, STATE_POSITION}, SWIZZLE_XYZW, true},
1031 {"gl_LightSource", "halfVector",
1032 {STATE_LIGHT, 0, STATE_HALF_VECTOR}, SWIZZLE_XYZW, true},
1033 {"gl_LightSource", "spotDirection",
1034 {STATE_LIGHT, 0, STATE_SPOT_DIRECTION}, SWIZZLE_XYZW, true},
1035 {"gl_LightSource", "spotCosCutoff",
1036 {STATE_LIGHT, 0, STATE_SPOT_DIRECTION}, SWIZZLE_WWWW, true},
1037 {"gl_LightSource", "spotCutoff",
1038 {STATE_LIGHT, 0, STATE_SPOT_CUTOFF}, SWIZZLE_XXXX, true},
1039 {"gl_LightSource", "spotExponent",
1040 {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_WWWW, true},
1041 {"gl_LightSource", "constantAttenuation",
1042 {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_XXXX, true},
1043 {"gl_LightSource", "linearAttenuation",
1044 {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_YYYY, true},
1045 {"gl_LightSource", "quadraticAttenuation",
1046 {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_ZZZZ, true},
1047
1048 {"gl_LightModel", NULL,
1049 {STATE_LIGHTMODEL_AMBIENT, 0}, SWIZZLE_XYZW},
1050
1051 {"gl_FrontLightModelProduct", NULL,
1052 {STATE_LIGHTMODEL_SCENECOLOR, 0}, SWIZZLE_XYZW},
1053 {"gl_BackLightModelProduct", NULL,
1054 {STATE_LIGHTMODEL_SCENECOLOR, 1}, SWIZZLE_XYZW},
1055
1056 {"gl_FrontLightProduct", "ambient",
1057 {STATE_LIGHTPROD, 0, 0, STATE_AMBIENT}, SWIZZLE_XYZW, true},
1058 {"gl_FrontLightProduct", "diffuse",
1059 {STATE_LIGHTPROD, 0, 0, STATE_DIFFUSE}, SWIZZLE_XYZW, true},
1060 {"gl_FrontLightProduct", "specular",
1061 {STATE_LIGHTPROD, 0, 0, STATE_SPECULAR}, SWIZZLE_XYZW, true},
1062
1063 {"gl_BackLightProduct", "ambient",
1064 {STATE_LIGHTPROD, 0, 1, STATE_AMBIENT}, SWIZZLE_XYZW, true},
1065 {"gl_BackLightProduct", "diffuse",
1066 {STATE_LIGHTPROD, 0, 1, STATE_DIFFUSE}, SWIZZLE_XYZW, true},
1067 {"gl_BackLightProduct", "specular",
1068 {STATE_LIGHTPROD, 0, 1, STATE_SPECULAR}, SWIZZLE_XYZW, true},
1069
1070 {"gl_TextureEnvColor", "ambient",
1071 {STATE_TEXENV_COLOR, 0}, SWIZZLE_XYZW, true},
1072
1073 {"gl_EyePlaneS", NULL,
1074 {STATE_TEXGEN, 0, STATE_TEXGEN_EYE_S}, SWIZZLE_XYZW, true},
1075 {"gl_EyePlaneT", NULL,
1076 {STATE_TEXGEN, 0, STATE_TEXGEN_EYE_T}, SWIZZLE_XYZW, true},
1077 {"gl_EyePlaneR", NULL,
1078 {STATE_TEXGEN, 0, STATE_TEXGEN_EYE_R}, SWIZZLE_XYZW, true},
1079 {"gl_EyePlaneQ", NULL,
1080 {STATE_TEXGEN, 0, STATE_TEXGEN_EYE_Q}, SWIZZLE_XYZW, true},
1081
1082 {"gl_ObjectPlaneS", NULL,
1083 {STATE_TEXGEN, 0, STATE_TEXGEN_OBJECT_S}, SWIZZLE_XYZW, true},
1084 {"gl_ObjectPlaneT", NULL,
1085 {STATE_TEXGEN, 0, STATE_TEXGEN_OBJECT_T}, SWIZZLE_XYZW, true},
1086 {"gl_ObjectPlaneR", NULL,
1087 {STATE_TEXGEN, 0, STATE_TEXGEN_OBJECT_R}, SWIZZLE_XYZW, true},
1088 {"gl_ObjectPlaneQ", NULL,
1089 {STATE_TEXGEN, 0, STATE_TEXGEN_OBJECT_Q}, SWIZZLE_XYZW, true},
1090
1091 {"gl_Fog", "color",
1092 {STATE_FOG_COLOR}, SWIZZLE_XYZW},
1093 {"gl_Fog", "density",
1094 {STATE_FOG_PARAMS}, SWIZZLE_XXXX},
1095 {"gl_Fog", "start",
1096 {STATE_FOG_PARAMS}, SWIZZLE_YYYY},
1097 {"gl_Fog", "end",
1098 {STATE_FOG_PARAMS}, SWIZZLE_ZZZZ},
1099 {"gl_Fog", "scale",
1100 {STATE_FOG_PARAMS}, SWIZZLE_WWWW},
1101 };
1102
1103 static ir_to_mesa_src_reg
1104 get_builtin_uniform_reg(struct gl_program *prog,
1105 const char *name, int array_index, const char *field)
1106 {
1107 unsigned int i;
1108 ir_to_mesa_src_reg src_reg;
1109 int tokens[STATE_LENGTH];
1110
1111 for (i = 0; i < Elements(statevars); i++) {
1112 if (strcmp(statevars[i].name, name) != 0)
1113 continue;
1114 if (!field && statevars[i].field) {
1115 assert(!"FINISHME: whole-structure state var dereference");
1116 }
1117 if (field && strcmp(statevars[i].field, field) != 0)
1118 continue;
1119 break;
1120 }
1121
1122 if (i == Elements(statevars)) {
1123 printf("builtin uniform %s%s%s not found\n",
1124 name,
1125 field ? "." : "",
1126 field ? field : "");
1127 abort();
1128 }
1129
1130 memcpy(&tokens, statevars[i].tokens, sizeof(tokens));
1131 if (statevars[i].array_indexed)
1132 tokens[1] = array_index;
1133
1134 src_reg.file = PROGRAM_STATE_VAR;
1135 src_reg.index = _mesa_add_state_reference(prog->Parameters,
1136 (gl_state_index *)tokens);
1137 src_reg.swizzle = statevars[i].swizzle;
1138 src_reg.negate = 0;
1139 src_reg.reladdr = false;
1140
1141 return src_reg;
1142 }
1143
1144 static int
1145 add_matrix_ref(struct gl_program *prog, int *tokens)
1146 {
1147 int base_pos = -1;
1148 int i;
1149
1150 /* Add a ref for each column. It looks like the reason we do
1151 * it this way is that _mesa_add_state_reference doesn't work
1152 * for things that aren't vec4s, so the tokens[2]/tokens[3]
1153 * range has to be equal.
1154 */
1155 for (i = 0; i < 4; i++) {
1156 tokens[2] = i;
1157 tokens[3] = i;
1158 int pos = _mesa_add_state_reference(prog->Parameters,
1159 (gl_state_index *)tokens);
1160 if (base_pos == -1)
1161 base_pos = pos;
1162 else
1163 assert(base_pos + i == pos);
1164 }
1165
1166 return base_pos;
1167 }
1168
1169 static variable_storage *
1170 get_builtin_matrix_ref(void *mem_ctx, struct gl_program *prog, ir_variable *var,
1171 ir_rvalue *array_index)
1172 {
1173 /*
1174 * NOTE: The ARB_vertex_program extension specified that matrices get
1175 * loaded in registers in row-major order. With GLSL, we want column-
1176 * major order. So, we need to transpose all matrices here...
1177 */
1178 static const struct {
1179 const char *name;
1180 int matrix;
1181 int modifier;
1182 } matrices[] = {
1183 { "gl_ModelViewMatrix", STATE_MODELVIEW_MATRIX, STATE_MATRIX_TRANSPOSE },
1184 { "gl_ModelViewMatrixInverse", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVTRANS },
1185 { "gl_ModelViewMatrixTranspose", STATE_MODELVIEW_MATRIX, 0 },
1186 { "gl_ModelViewMatrixInverseTranspose", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVERSE },
1187
1188 { "gl_ProjectionMatrix", STATE_PROJECTION_MATRIX, STATE_MATRIX_TRANSPOSE },
1189 { "gl_ProjectionMatrixInverse", STATE_PROJECTION_MATRIX, STATE_MATRIX_INVTRANS },
1190 { "gl_ProjectionMatrixTranspose", STATE_PROJECTION_MATRIX, 0 },
1191 { "gl_ProjectionMatrixInverseTranspose", STATE_PROJECTION_MATRIX, STATE_MATRIX_INVERSE },
1192
1193 { "gl_ModelViewProjectionMatrix", STATE_MVP_MATRIX, STATE_MATRIX_TRANSPOSE },
1194 { "gl_ModelViewProjectionMatrixInverse", STATE_MVP_MATRIX, STATE_MATRIX_INVTRANS },
1195 { "gl_ModelViewProjectionMatrixTranspose", STATE_MVP_MATRIX, 0 },
1196 { "gl_ModelViewProjectionMatrixInverseTranspose", STATE_MVP_MATRIX, STATE_MATRIX_INVERSE },
1197
1198 { "gl_TextureMatrix", STATE_TEXTURE_MATRIX, STATE_MATRIX_TRANSPOSE },
1199 { "gl_TextureMatrixInverse", STATE_TEXTURE_MATRIX, STATE_MATRIX_INVTRANS },
1200 { "gl_TextureMatrixTranspose", STATE_TEXTURE_MATRIX, 0 },
1201 { "gl_TextureMatrixInverseTranspose", STATE_TEXTURE_MATRIX, STATE_MATRIX_INVERSE },
1202
1203 { "gl_NormalMatrix", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVERSE },
1204
1205 };
1206 unsigned int i;
1207 variable_storage *entry;
1208
1209 /* C++ gets angry when we try to use an int as a gl_state_index, so we use
1210 * ints for gl_state_index. Make sure they're compatible.
1211 */
1212 assert(sizeof(gl_state_index) == sizeof(int));
1213
1214 for (i = 0; i < Elements(matrices); i++) {
1215 if (strcmp(var->name, matrices[i].name) == 0) {
1216 int tokens[STATE_LENGTH];
1217 int base_pos = -1;
1218
1219 tokens[0] = matrices[i].matrix;
1220 tokens[4] = matrices[i].modifier;
1221 if (matrices[i].matrix == STATE_TEXTURE_MATRIX) {
1222 ir_constant *index = array_index->constant_expression_value();
1223 if (index) {
1224 tokens[1] = index->value.i[0];
1225 base_pos = add_matrix_ref(prog, tokens);
1226 } else {
1227 for (i = 0; i < var->type->length; i++) {
1228 tokens[1] = i;
1229 int pos = add_matrix_ref(prog, tokens);
1230 if (base_pos == -1)
1231 base_pos = pos;
1232 else
1233 assert(base_pos + (int)i * 4 == pos);
1234 }
1235 }
1236 } else {
1237 tokens[1] = 0; /* unused array index */
1238 base_pos = add_matrix_ref(prog, tokens);
1239 }
1240 tokens[4] = matrices[i].modifier;
1241
1242 entry = new(mem_ctx) variable_storage(var,
1243 PROGRAM_STATE_VAR,
1244 base_pos);
1245
1246 return entry;
1247 }
1248 }
1249
1250 return NULL;
1251 }
1252
1253 int
1254 ir_to_mesa_visitor::add_uniform(const char *name,
1255 const glsl_type *type,
1256 ir_constant *constant)
1257 {
1258 int len;
1259
1260 if (type->is_vector() ||
1261 type->is_scalar()) {
1262 len = type->vector_elements;
1263 } else {
1264 len = type_size(type) * 4;
1265 }
1266
1267 float *values = NULL;
1268 if (constant && type->is_array()) {
1269 values = (float *)malloc(type->length * 4 * sizeof(float));
1270
1271 assert(type->fields.array->is_scalar() ||
1272 type->fields.array->is_vector() ||
1273 !"FINISHME: uniform array initializers for non-vector");
1274
1275 for (unsigned int i = 0; i < type->length; i++) {
1276 ir_constant *element = constant->array_elements[i];
1277 unsigned int c;
1278
1279 for (c = 0; c < type->fields.array->vector_elements; c++) {
1280 switch (type->fields.array->base_type) {
1281 case GLSL_TYPE_FLOAT:
1282 values[4 * i + c] = element->value.f[c];
1283 break;
1284 case GLSL_TYPE_INT:
1285 values[4 * i + c] = element->value.i[c];
1286 break;
1287 case GLSL_TYPE_UINT:
1288 values[4 * i + c] = element->value.u[c];
1289 break;
1290 case GLSL_TYPE_BOOL:
1291 values[4 * i + c] = element->value.b[c];
1292 break;
1293 default:
1294 assert(!"not reached");
1295 }
1296 }
1297 }
1298 } else if (constant) {
1299 values = (float *)malloc(16 * sizeof(float));
1300 for (unsigned int i = 0; i < type->components(); i++) {
1301 switch (type->base_type) {
1302 case GLSL_TYPE_FLOAT:
1303 values[i] = constant->value.f[i];
1304 break;
1305 case GLSL_TYPE_INT:
1306 values[i] = constant->value.i[i];
1307 break;
1308 case GLSL_TYPE_UINT:
1309 values[i] = constant->value.u[i];
1310 break;
1311 case GLSL_TYPE_BOOL:
1312 values[i] = constant->value.b[i];
1313 break;
1314 default:
1315 assert(!"not reached");
1316 }
1317 }
1318 }
1319
1320 int loc = _mesa_add_uniform(this->prog->Parameters,
1321 name,
1322 len,
1323 type->gl_type,
1324 values);
1325 free(values);
1326
1327 return loc;
1328 }
1329
1330 /* Recursively add all the members of the aggregate uniform as uniform names
1331 * to Mesa, moving those uniforms to our structured temporary.
1332 */
1333 void
1334 ir_to_mesa_visitor::add_aggregate_uniform(ir_instruction *ir,
1335 const char *name,
1336 const struct glsl_type *type,
1337 ir_constant *constant,
1338 struct ir_to_mesa_dst_reg temp)
1339 {
1340 int loc;
1341
1342 if (type->is_record()) {
1343 void *mem_ctx = talloc_new(NULL);
1344 ir_constant *field_constant = NULL;
1345
1346 if (constant)
1347 field_constant = (ir_constant *)constant->components.get_head();
1348
1349 for (unsigned int i = 0; i < type->length; i++) {
1350 const glsl_type *field_type = type->fields.structure[i].type;
1351
1352 add_aggregate_uniform(ir,
1353 talloc_asprintf(mem_ctx, "%s.%s", name,
1354 type->fields.structure[i].name),
1355 field_type, field_constant, temp);
1356 temp.index += type_size(field_type);
1357
1358 if (constant)
1359 field_constant = (ir_constant *)field_constant->next;
1360 }
1361
1362 talloc_free(mem_ctx);
1363
1364 return;
1365 }
1366
1367 assert(type->is_vector() || type->is_scalar() || !"FINISHME: other types");
1368
1369 loc = add_uniform(name, type, constant);
1370
1371 ir_to_mesa_src_reg uniform(PROGRAM_UNIFORM, loc, type);
1372
1373 for (int i = 0; i < type_size(type); i++) {
1374 ir_to_mesa_emit_op1(ir, OPCODE_MOV, temp, uniform);
1375 temp.index++;
1376 uniform.index++;
1377 }
1378 }
1379
1380
1381 void
1382 ir_to_mesa_visitor::visit(ir_dereference_variable *ir)
1383 {
1384 variable_storage *entry = find_variable_storage(ir->var);
1385 unsigned int loc;
1386
1387 if (!entry) {
1388 switch (ir->var->mode) {
1389 case ir_var_uniform:
1390 entry = get_builtin_matrix_ref(this->mem_ctx, this->prog, ir->var,
1391 NULL);
1392 if (entry)
1393 break;
1394
1395 /* FINISHME: Fix up uniform name for arrays and things */
1396 if (ir->var->type->base_type == GLSL_TYPE_SAMPLER) {
1397 /* FINISHME: we whack the location of the var here, which
1398 * is probably not expected. But we need to communicate
1399 * mesa's sampler number to the tex instruction.
1400 */
1401 int sampler = _mesa_add_sampler(this->prog->Parameters,
1402 ir->var->name,
1403 ir->var->type->gl_type);
1404 map_sampler(ir->var->location, sampler);
1405
1406 entry = new(mem_ctx) variable_storage(ir->var, PROGRAM_SAMPLER,
1407 sampler);
1408 this->variables.push_tail(entry);
1409 break;
1410 }
1411
1412 assert(ir->var->type->gl_type != 0 &&
1413 ir->var->type->gl_type != GL_INVALID_ENUM);
1414
1415 /* Oh, the joy of aggregate types in Mesa. Like constants,
1416 * we can only really do vec4s. So, make a temp, chop the
1417 * aggregate up into vec4s, and move those vec4s to the temp.
1418 */
1419 if (ir->var->type->is_record()) {
1420 ir_to_mesa_src_reg temp = get_temp(ir->var->type);
1421
1422 entry = new(mem_ctx) variable_storage(ir->var,
1423 temp.file,
1424 temp.index);
1425 this->variables.push_tail(entry);
1426
1427 add_aggregate_uniform(ir->var, ir->var->name, ir->var->type,
1428 ir->var->constant_value,
1429 ir_to_mesa_dst_reg_from_src(temp));
1430 break;
1431 }
1432
1433 loc = add_uniform(ir->var->name,
1434 ir->var->type,
1435 ir->var->constant_value);
1436
1437 /* Always mark the uniform used at this point. If it isn't
1438 * used, dead code elimination should have nuked the decl already.
1439 */
1440 this->prog->Parameters->Parameters[loc].Used = GL_TRUE;
1441
1442 entry = new(mem_ctx) variable_storage(ir->var, PROGRAM_UNIFORM, loc);
1443 this->variables.push_tail(entry);
1444 break;
1445 case ir_var_in:
1446 case ir_var_out:
1447 case ir_var_inout:
1448 /* The linker assigns locations for varyings and attributes,
1449 * including deprecated builtins (like gl_Color), user-assign
1450 * generic attributes (glBindVertexLocation), and
1451 * user-defined varyings.
1452 *
1453 * FINISHME: We would hit this path for function arguments. Fix!
1454 */
1455 assert(ir->var->location != -1);
1456 if (ir->var->mode == ir_var_in ||
1457 ir->var->mode == ir_var_inout) {
1458 entry = new(mem_ctx) variable_storage(ir->var,
1459 PROGRAM_INPUT,
1460 ir->var->location);
1461
1462 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
1463 ir->var->location >= VERT_ATTRIB_GENERIC0) {
1464 _mesa_add_attribute(prog->Attributes,
1465 ir->var->name,
1466 type_size(ir->var->type) * 4,
1467 ir->var->type->gl_type,
1468 ir->var->location - VERT_ATTRIB_GENERIC0);
1469 }
1470 } else {
1471 entry = new(mem_ctx) variable_storage(ir->var,
1472 PROGRAM_OUTPUT,
1473 ir->var->location);
1474 }
1475
1476 break;
1477 case ir_var_auto:
1478 case ir_var_temporary:
1479 entry = new(mem_ctx) variable_storage(ir->var, PROGRAM_TEMPORARY,
1480 this->next_temp);
1481 this->variables.push_tail(entry);
1482
1483 next_temp += type_size(ir->var->type);
1484 break;
1485 }
1486
1487 if (!entry) {
1488 printf("Failed to make storage for %s\n", ir->var->name);
1489 exit(1);
1490 }
1491 }
1492
1493 this->result = ir_to_mesa_src_reg(entry->file, entry->index, ir->var->type);
1494 }
1495
1496 void
1497 ir_to_mesa_visitor::visit(ir_dereference_array *ir)
1498 {
1499 ir_variable *var = ir->variable_referenced();
1500 ir_constant *index;
1501 ir_to_mesa_src_reg src_reg;
1502 ir_dereference_variable *deref_var = ir->array->as_dereference_variable();
1503 int element_size = type_size(ir->type);
1504
1505 index = ir->array_index->constant_expression_value();
1506
1507 if (deref_var && strncmp(deref_var->var->name,
1508 "gl_TextureMatrix",
1509 strlen("gl_TextureMatrix")) == 0) {
1510 struct variable_storage *entry;
1511
1512 entry = get_builtin_matrix_ref(this->mem_ctx, this->prog, deref_var->var,
1513 ir->array_index);
1514 assert(entry);
1515
1516 ir_to_mesa_src_reg src_reg(entry->file, entry->index, ir->type);
1517
1518 if (index) {
1519 src_reg.reladdr = NULL;
1520 } else {
1521 ir_to_mesa_src_reg index_reg = get_temp(glsl_type::float_type);
1522
1523 ir->array_index->accept(this);
1524 ir_to_mesa_emit_op2(ir, OPCODE_MUL,
1525 ir_to_mesa_dst_reg_from_src(index_reg),
1526 this->result, src_reg_for_float(element_size));
1527
1528 src_reg.reladdr = talloc(mem_ctx, ir_to_mesa_src_reg);
1529 memcpy(src_reg.reladdr, &index_reg, sizeof(index_reg));
1530 }
1531
1532 this->result = src_reg;
1533 return;
1534 }
1535
1536 if (strncmp(var->name, "gl_", 3) == 0 && var->mode == ir_var_uniform &&
1537 !var->type->is_matrix()) {
1538 ir_dereference_record *record = NULL;
1539 if (ir->array->ir_type == ir_type_dereference_record)
1540 record = (ir_dereference_record *)ir->array;
1541
1542 assert(index || !"FINISHME: variable-indexed builtin uniform access");
1543
1544 this->result = get_builtin_uniform_reg(prog,
1545 var->name,
1546 index->value.i[0],
1547 record ? record->field : NULL);
1548 }
1549
1550 ir->array->accept(this);
1551 src_reg = this->result;
1552
1553 if (index) {
1554 src_reg.index += index->value.i[0] * element_size;
1555 } else {
1556 ir_to_mesa_src_reg array_base = this->result;
1557 /* Variable index array dereference. It eats the "vec4" of the
1558 * base of the array and an index that offsets the Mesa register
1559 * index.
1560 */
1561 ir->array_index->accept(this);
1562
1563 ir_to_mesa_src_reg index_reg;
1564
1565 if (element_size == 1) {
1566 index_reg = this->result;
1567 } else {
1568 index_reg = get_temp(glsl_type::float_type);
1569
1570 ir_to_mesa_emit_op2(ir, OPCODE_MUL,
1571 ir_to_mesa_dst_reg_from_src(index_reg),
1572 this->result, src_reg_for_float(element_size));
1573 }
1574
1575 src_reg.reladdr = talloc(mem_ctx, ir_to_mesa_src_reg);
1576 memcpy(src_reg.reladdr, &index_reg, sizeof(index_reg));
1577 }
1578
1579 /* If the type is smaller than a vec4, replicate the last channel out. */
1580 if (ir->type->is_scalar() || ir->type->is_vector())
1581 src_reg.swizzle = swizzle_for_size(ir->type->vector_elements);
1582 else
1583 src_reg.swizzle = SWIZZLE_NOOP;
1584
1585 this->result = src_reg;
1586 }
1587
1588 void
1589 ir_to_mesa_visitor::visit(ir_dereference_record *ir)
1590 {
1591 unsigned int i;
1592 const glsl_type *struct_type = ir->record->type;
1593 int offset = 0;
1594 ir_variable *var = ir->record->variable_referenced();
1595
1596 if (strncmp(var->name, "gl_", 3) == 0 && var->mode == ir_var_uniform) {
1597 assert(var);
1598
1599 this->result = get_builtin_uniform_reg(prog,
1600 var->name,
1601 0,
1602 ir->field);
1603 return;
1604 }
1605
1606 ir->record->accept(this);
1607
1608 for (i = 0; i < struct_type->length; i++) {
1609 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1610 break;
1611 offset += type_size(struct_type->fields.structure[i].type);
1612 }
1613 this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1614 this->result.index += offset;
1615 }
1616
1617 /**
1618 * We want to be careful in assignment setup to hit the actual storage
1619 * instead of potentially using a temporary like we might with the
1620 * ir_dereference handler.
1621 *
1622 * Thanks to ir_swizzle_swizzle, and ir_vec_index_to_swizzle, we
1623 * should only see potentially one variable array index of a vector,
1624 * and one swizzle, before getting to actual vec4 storage. So handle
1625 * those, then go use ir_dereference to handle the rest.
1626 */
1627 static struct ir_to_mesa_dst_reg
1628 get_assignment_lhs(ir_instruction *ir, ir_to_mesa_visitor *v,
1629 ir_to_mesa_src_reg *r)
1630 {
1631 struct ir_to_mesa_dst_reg dst_reg;
1632 ir_swizzle *swiz;
1633
1634 ir_dereference_array *deref_array = ir->as_dereference_array();
1635 /* This should have been handled by ir_vec_index_to_cond_assign */
1636 if (deref_array) {
1637 assert(!deref_array->array->type->is_vector());
1638 }
1639
1640 /* Use the rvalue deref handler for the most part. We'll ignore
1641 * swizzles in it and write swizzles using writemask, though.
1642 */
1643 ir->accept(v);
1644 dst_reg = ir_to_mesa_dst_reg_from_src(v->result);
1645
1646 if ((swiz = ir->as_swizzle())) {
1647 int swizzles[4] = {
1648 swiz->mask.x,
1649 swiz->mask.y,
1650 swiz->mask.z,
1651 swiz->mask.w
1652 };
1653 int new_r_swizzle[4];
1654 int orig_r_swizzle = r->swizzle;
1655 int i;
1656
1657 for (i = 0; i < 4; i++) {
1658 new_r_swizzle[i] = GET_SWZ(orig_r_swizzle, 0);
1659 }
1660
1661 dst_reg.writemask = 0;
1662 for (i = 0; i < 4; i++) {
1663 if (i < swiz->mask.num_components) {
1664 dst_reg.writemask |= 1 << swizzles[i];
1665 new_r_swizzle[swizzles[i]] = GET_SWZ(orig_r_swizzle, i);
1666 }
1667 }
1668
1669 r->swizzle = MAKE_SWIZZLE4(new_r_swizzle[0],
1670 new_r_swizzle[1],
1671 new_r_swizzle[2],
1672 new_r_swizzle[3]);
1673 }
1674
1675 return dst_reg;
1676 }
1677
1678 void
1679 ir_to_mesa_visitor::visit(ir_assignment *ir)
1680 {
1681 struct ir_to_mesa_dst_reg l;
1682 struct ir_to_mesa_src_reg r;
1683 int i;
1684
1685 ir->rhs->accept(this);
1686 r = this->result;
1687
1688 l = get_assignment_lhs(ir->lhs, this, &r);
1689
1690 assert(l.file != PROGRAM_UNDEFINED);
1691 assert(r.file != PROGRAM_UNDEFINED);
1692
1693 if (ir->condition) {
1694 ir_to_mesa_src_reg condition;
1695
1696 ir->condition->accept(this);
1697 condition = this->result;
1698
1699 /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves,
1700 * and the condition we produced is 0.0 or 1.0. By flipping the
1701 * sign, we can choose which value OPCODE_CMP produces without
1702 * an extra computing the condition.
1703 */
1704 condition.negate = ~condition.negate;
1705 for (i = 0; i < type_size(ir->lhs->type); i++) {
1706 ir_to_mesa_emit_op3(ir, OPCODE_CMP, l,
1707 condition, r, ir_to_mesa_src_reg_from_dst(l));
1708 l.index++;
1709 r.index++;
1710 }
1711 } else {
1712 for (i = 0; i < type_size(ir->lhs->type); i++) {
1713 ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r);
1714 l.index++;
1715 r.index++;
1716 }
1717 }
1718 }
1719
1720
1721 void
1722 ir_to_mesa_visitor::visit(ir_constant *ir)
1723 {
1724 ir_to_mesa_src_reg src_reg;
1725 GLfloat stack_vals[4];
1726 GLfloat *values = stack_vals;
1727 unsigned int i;
1728
1729 /* Unfortunately, 4 floats is all we can get into
1730 * _mesa_add_unnamed_constant. So, make a temp to store an
1731 * aggregate constant and move each constant value into it. If we
1732 * get lucky, copy propagation will eliminate the extra moves.
1733 */
1734
1735 if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1736 ir_to_mesa_src_reg temp_base = get_temp(ir->type);
1737 ir_to_mesa_dst_reg temp = ir_to_mesa_dst_reg_from_src(temp_base);
1738
1739 foreach_iter(exec_list_iterator, iter, ir->components) {
1740 ir_constant *field_value = (ir_constant *)iter.get();
1741 int size = type_size(field_value->type);
1742
1743 assert(size > 0);
1744
1745 field_value->accept(this);
1746 src_reg = this->result;
1747
1748 for (i = 0; i < (unsigned int)size; i++) {
1749 ir_to_mesa_emit_op1(ir, OPCODE_MOV, temp, src_reg);
1750
1751 src_reg.index++;
1752 temp.index++;
1753 }
1754 }
1755 this->result = temp_base;
1756 return;
1757 }
1758
1759 if (ir->type->is_array()) {
1760 ir_to_mesa_src_reg temp_base = get_temp(ir->type);
1761 ir_to_mesa_dst_reg temp = ir_to_mesa_dst_reg_from_src(temp_base);
1762 int size = type_size(ir->type->fields.array);
1763
1764 assert(size > 0);
1765
1766 for (i = 0; i < ir->type->length; i++) {
1767 ir->array_elements[i]->accept(this);
1768 src_reg = this->result;
1769 for (int j = 0; j < size; j++) {
1770 ir_to_mesa_emit_op1(ir, OPCODE_MOV, temp, src_reg);
1771
1772 src_reg.index++;
1773 temp.index++;
1774 }
1775 }
1776 this->result = temp_base;
1777 return;
1778 }
1779
1780 if (ir->type->is_matrix()) {
1781 ir_to_mesa_src_reg mat = get_temp(ir->type);
1782 ir_to_mesa_dst_reg mat_column = ir_to_mesa_dst_reg_from_src(mat);
1783
1784 for (i = 0; i < ir->type->matrix_columns; i++) {
1785 assert(ir->type->base_type == GLSL_TYPE_FLOAT);
1786 values = &ir->value.f[i * ir->type->vector_elements];
1787
1788 src_reg = ir_to_mesa_src_reg(PROGRAM_CONSTANT, -1, NULL);
1789 src_reg.index = _mesa_add_unnamed_constant(this->prog->Parameters,
1790 values,
1791 ir->type->vector_elements,
1792 &src_reg.swizzle);
1793 ir_to_mesa_emit_op1(ir, OPCODE_MOV, mat_column, src_reg);
1794
1795 mat_column.index++;
1796 }
1797
1798 this->result = mat;
1799 }
1800
1801 src_reg.file = PROGRAM_CONSTANT;
1802 switch (ir->type->base_type) {
1803 case GLSL_TYPE_FLOAT:
1804 values = &ir->value.f[0];
1805 break;
1806 case GLSL_TYPE_UINT:
1807 for (i = 0; i < ir->type->vector_elements; i++) {
1808 values[i] = ir->value.u[i];
1809 }
1810 break;
1811 case GLSL_TYPE_INT:
1812 for (i = 0; i < ir->type->vector_elements; i++) {
1813 values[i] = ir->value.i[i];
1814 }
1815 break;
1816 case GLSL_TYPE_BOOL:
1817 for (i = 0; i < ir->type->vector_elements; i++) {
1818 values[i] = ir->value.b[i];
1819 }
1820 break;
1821 default:
1822 assert(!"Non-float/uint/int/bool constant");
1823 }
1824
1825 this->result = ir_to_mesa_src_reg(PROGRAM_CONSTANT, -1, ir->type);
1826 this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters,
1827 values,
1828 ir->type->vector_elements,
1829 &this->result.swizzle);
1830 }
1831
1832 function_entry *
1833 ir_to_mesa_visitor::get_function_signature(ir_function_signature *sig)
1834 {
1835 function_entry *entry;
1836
1837 foreach_iter(exec_list_iterator, iter, this->function_signatures) {
1838 entry = (function_entry *)iter.get();
1839
1840 if (entry->sig == sig)
1841 return entry;
1842 }
1843
1844 entry = talloc(mem_ctx, function_entry);
1845 entry->sig = sig;
1846 entry->sig_id = this->next_signature_id++;
1847 entry->bgn_inst = NULL;
1848
1849 /* Allocate storage for all the parameters. */
1850 foreach_iter(exec_list_iterator, iter, sig->parameters) {
1851 ir_variable *param = (ir_variable *)iter.get();
1852 variable_storage *storage;
1853
1854 storage = find_variable_storage(param);
1855 assert(!storage);
1856
1857 storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY,
1858 this->next_temp);
1859 this->variables.push_tail(storage);
1860
1861 this->next_temp += type_size(param->type);
1862 }
1863
1864 if (!sig->return_type->is_void()) {
1865 entry->return_reg = get_temp(sig->return_type);
1866 } else {
1867 entry->return_reg = ir_to_mesa_undef;
1868 }
1869
1870 this->function_signatures.push_tail(entry);
1871 return entry;
1872 }
1873
1874 void
1875 ir_to_mesa_visitor::visit(ir_call *ir)
1876 {
1877 ir_to_mesa_instruction *call_inst;
1878 ir_function_signature *sig = ir->get_callee();
1879 function_entry *entry = get_function_signature(sig);
1880 int i;
1881
1882 /* Process in parameters. */
1883 exec_list_iterator sig_iter = sig->parameters.iterator();
1884 foreach_iter(exec_list_iterator, iter, *ir) {
1885 ir_rvalue *param_rval = (ir_rvalue *)iter.get();
1886 ir_variable *param = (ir_variable *)sig_iter.get();
1887
1888 if (param->mode == ir_var_in ||
1889 param->mode == ir_var_inout) {
1890 variable_storage *storage = find_variable_storage(param);
1891 assert(storage);
1892
1893 param_rval->accept(this);
1894 ir_to_mesa_src_reg r = this->result;
1895
1896 ir_to_mesa_dst_reg l;
1897 l.file = storage->file;
1898 l.index = storage->index;
1899 l.reladdr = NULL;
1900 l.writemask = WRITEMASK_XYZW;
1901 l.cond_mask = COND_TR;
1902
1903 for (i = 0; i < type_size(param->type); i++) {
1904 ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r);
1905 l.index++;
1906 r.index++;
1907 }
1908 }
1909
1910 sig_iter.next();
1911 }
1912 assert(!sig_iter.has_next());
1913
1914 /* Emit call instruction */
1915 call_inst = ir_to_mesa_emit_op1(ir, OPCODE_CAL,
1916 ir_to_mesa_undef_dst, ir_to_mesa_undef);
1917 call_inst->function = entry;
1918
1919 /* Process out parameters. */
1920 sig_iter = sig->parameters.iterator();
1921 foreach_iter(exec_list_iterator, iter, *ir) {
1922 ir_rvalue *param_rval = (ir_rvalue *)iter.get();
1923 ir_variable *param = (ir_variable *)sig_iter.get();
1924
1925 if (param->mode == ir_var_out ||
1926 param->mode == ir_var_inout) {
1927 variable_storage *storage = find_variable_storage(param);
1928 assert(storage);
1929
1930 ir_to_mesa_src_reg r;
1931 r.file = storage->file;
1932 r.index = storage->index;
1933 r.reladdr = NULL;
1934 r.swizzle = SWIZZLE_NOOP;
1935 r.negate = 0;
1936
1937 param_rval->accept(this);
1938 ir_to_mesa_dst_reg l = ir_to_mesa_dst_reg_from_src(this->result);
1939
1940 for (i = 0; i < type_size(param->type); i++) {
1941 ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r);
1942 l.index++;
1943 r.index++;
1944 }
1945 }
1946
1947 sig_iter.next();
1948 }
1949 assert(!sig_iter.has_next());
1950
1951 /* Process return value. */
1952 this->result = entry->return_reg;
1953 }
1954
1955
1956 void
1957 ir_to_mesa_visitor::visit(ir_texture *ir)
1958 {
1959 ir_to_mesa_src_reg result_src, coord, lod_info, projector;
1960 ir_to_mesa_dst_reg result_dst, coord_dst;
1961 ir_to_mesa_instruction *inst = NULL;
1962 prog_opcode opcode = OPCODE_NOP;
1963
1964 ir->coordinate->accept(this);
1965
1966 /* Put our coords in a temp. We'll need to modify them for shadow,
1967 * projection, or LOD, so the only case we'd use it as is is if
1968 * we're doing plain old texturing. Mesa IR optimization should
1969 * handle cleaning up our mess in that case.
1970 */
1971 coord = get_temp(glsl_type::vec4_type);
1972 coord_dst = ir_to_mesa_dst_reg_from_src(coord);
1973 ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst,
1974 this->result);
1975
1976 if (ir->projector) {
1977 ir->projector->accept(this);
1978 projector = this->result;
1979 }
1980
1981 /* Storage for our result. Ideally for an assignment we'd be using
1982 * the actual storage for the result here, instead.
1983 */
1984 result_src = get_temp(glsl_type::vec4_type);
1985 result_dst = ir_to_mesa_dst_reg_from_src(result_src);
1986
1987 switch (ir->op) {
1988 case ir_tex:
1989 opcode = OPCODE_TEX;
1990 break;
1991 case ir_txb:
1992 opcode = OPCODE_TXB;
1993 ir->lod_info.bias->accept(this);
1994 lod_info = this->result;
1995 break;
1996 case ir_txl:
1997 opcode = OPCODE_TXL;
1998 ir->lod_info.lod->accept(this);
1999 lod_info = this->result;
2000 break;
2001 case ir_txd:
2002 case ir_txf:
2003 assert(!"GLSL 1.30 features unsupported");
2004 break;
2005 }
2006
2007 if (ir->projector) {
2008 if (opcode == OPCODE_TEX) {
2009 /* Slot the projector in as the last component of the coord. */
2010 coord_dst.writemask = WRITEMASK_W;
2011 ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst, projector);
2012 coord_dst.writemask = WRITEMASK_XYZW;
2013 opcode = OPCODE_TXP;
2014 } else {
2015 ir_to_mesa_src_reg coord_w = coord;
2016 coord_w.swizzle = SWIZZLE_WWWW;
2017
2018 /* For the other TEX opcodes there's no projective version
2019 * since the last slot is taken up by lod info. Do the
2020 * projective divide now.
2021 */
2022 coord_dst.writemask = WRITEMASK_W;
2023 ir_to_mesa_emit_op1(ir, OPCODE_RCP, coord_dst, projector);
2024
2025 coord_dst.writemask = WRITEMASK_XYZ;
2026 ir_to_mesa_emit_op2(ir, OPCODE_MUL, coord_dst, coord, coord_w);
2027
2028 coord_dst.writemask = WRITEMASK_XYZW;
2029 coord.swizzle = SWIZZLE_XYZW;
2030 }
2031 }
2032
2033 if (ir->shadow_comparitor) {
2034 /* Slot the shadow value in as the second to last component of the
2035 * coord.
2036 */
2037 ir->shadow_comparitor->accept(this);
2038 coord_dst.writemask = WRITEMASK_Z;
2039 ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst, this->result);
2040 coord_dst.writemask = WRITEMASK_XYZW;
2041 }
2042
2043 if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) {
2044 /* Mesa IR stores lod or lod bias in the last channel of the coords. */
2045 coord_dst.writemask = WRITEMASK_W;
2046 ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst, lod_info);
2047 coord_dst.writemask = WRITEMASK_XYZW;
2048 }
2049
2050 inst = ir_to_mesa_emit_op1(ir, opcode, result_dst, coord);
2051
2052 if (ir->shadow_comparitor)
2053 inst->tex_shadow = GL_TRUE;
2054
2055 ir_dereference_variable *sampler = ir->sampler->as_dereference_variable();
2056 assert(sampler); /* FINISHME: sampler arrays */
2057 /* generate the mapping, remove when we generate storage at
2058 * declaration time
2059 */
2060 sampler->accept(this);
2061
2062 inst->sampler = get_sampler_number(sampler->var->location);
2063
2064 switch (sampler->type->sampler_dimensionality) {
2065 case GLSL_SAMPLER_DIM_1D:
2066 inst->tex_target = TEXTURE_1D_INDEX;
2067 break;
2068 case GLSL_SAMPLER_DIM_2D:
2069 inst->tex_target = TEXTURE_2D_INDEX;
2070 break;
2071 case GLSL_SAMPLER_DIM_3D:
2072 inst->tex_target = TEXTURE_3D_INDEX;
2073 break;
2074 case GLSL_SAMPLER_DIM_CUBE:
2075 inst->tex_target = TEXTURE_CUBE_INDEX;
2076 break;
2077 default:
2078 assert(!"FINISHME: other texture targets");
2079 }
2080
2081 this->result = result_src;
2082 }
2083
2084 void
2085 ir_to_mesa_visitor::visit(ir_return *ir)
2086 {
2087 assert(current_function);
2088
2089 if (ir->get_value()) {
2090 ir_to_mesa_dst_reg l;
2091 int i;
2092
2093 ir->get_value()->accept(this);
2094 ir_to_mesa_src_reg r = this->result;
2095
2096 l = ir_to_mesa_dst_reg_from_src(current_function->return_reg);
2097
2098 for (i = 0; i < type_size(current_function->sig->return_type); i++) {
2099 ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r);
2100 l.index++;
2101 r.index++;
2102 }
2103 }
2104
2105 ir_to_mesa_emit_op0(ir, OPCODE_RET);
2106 }
2107
2108 void
2109 ir_to_mesa_visitor::visit(ir_discard *ir)
2110 {
2111 assert(ir->condition == NULL); /* FINISHME */
2112
2113 ir_to_mesa_emit_op0(ir, OPCODE_KIL_NV);
2114 }
2115
2116 void
2117 ir_to_mesa_visitor::visit(ir_if *ir)
2118 {
2119 ir_to_mesa_instruction *cond_inst, *if_inst, *else_inst = NULL;
2120 ir_to_mesa_instruction *prev_inst;
2121
2122 prev_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
2123
2124 ir->condition->accept(this);
2125 assert(this->result.file != PROGRAM_UNDEFINED);
2126
2127 if (ctx->Shader.EmitCondCodes) {
2128 cond_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
2129
2130 /* See if we actually generated any instruction for generating
2131 * the condition. If not, then cook up a move to a temp so we
2132 * have something to set cond_update on.
2133 */
2134 if (cond_inst == prev_inst) {
2135 ir_to_mesa_src_reg temp = get_temp(glsl_type::bool_type);
2136 cond_inst = ir_to_mesa_emit_op1(ir->condition, OPCODE_MOV,
2137 ir_to_mesa_dst_reg_from_src(temp),
2138 result);
2139 }
2140 cond_inst->cond_update = GL_TRUE;
2141
2142 if_inst = ir_to_mesa_emit_op0(ir->condition, OPCODE_IF);
2143 if_inst->dst_reg.cond_mask = COND_NE;
2144 } else {
2145 if_inst = ir_to_mesa_emit_op1(ir->condition,
2146 OPCODE_IF, ir_to_mesa_undef_dst,
2147 this->result);
2148 }
2149
2150 this->instructions.push_tail(if_inst);
2151
2152 visit_exec_list(&ir->then_instructions, this);
2153
2154 if (!ir->else_instructions.is_empty()) {
2155 else_inst = ir_to_mesa_emit_op0(ir->condition, OPCODE_ELSE);
2156 visit_exec_list(&ir->else_instructions, this);
2157 }
2158
2159 if_inst = ir_to_mesa_emit_op1(ir->condition, OPCODE_ENDIF,
2160 ir_to_mesa_undef_dst, ir_to_mesa_undef);
2161 }
2162
2163 ir_to_mesa_visitor::ir_to_mesa_visitor()
2164 {
2165 result.file = PROGRAM_UNDEFINED;
2166 next_temp = 1;
2167 next_signature_id = 1;
2168 sampler_map = NULL;
2169 sampler_map_size = 0;
2170 current_function = NULL;
2171 }
2172
2173 static struct prog_src_register
2174 mesa_src_reg_from_ir_src_reg(ir_to_mesa_src_reg reg)
2175 {
2176 struct prog_src_register mesa_reg;
2177
2178 mesa_reg.File = reg.file;
2179 assert(reg.index < (1 << INST_INDEX_BITS) - 1);
2180 mesa_reg.Index = reg.index;
2181 mesa_reg.Swizzle = reg.swizzle;
2182 mesa_reg.RelAddr = reg.reladdr != NULL;
2183 mesa_reg.Negate = reg.negate;
2184 mesa_reg.Abs = 0;
2185 mesa_reg.HasIndex2 = GL_FALSE;
2186
2187 return mesa_reg;
2188 }
2189
2190 static void
2191 set_branchtargets(ir_to_mesa_visitor *v,
2192 struct prog_instruction *mesa_instructions,
2193 int num_instructions)
2194 {
2195 int if_count = 0, loop_count = 0;
2196 int *if_stack, *loop_stack;
2197 int if_stack_pos = 0, loop_stack_pos = 0;
2198 int i, j;
2199
2200 for (i = 0; i < num_instructions; i++) {
2201 switch (mesa_instructions[i].Opcode) {
2202 case OPCODE_IF:
2203 if_count++;
2204 break;
2205 case OPCODE_BGNLOOP:
2206 loop_count++;
2207 break;
2208 case OPCODE_BRK:
2209 case OPCODE_CONT:
2210 mesa_instructions[i].BranchTarget = -1;
2211 break;
2212 default:
2213 break;
2214 }
2215 }
2216
2217 if_stack = (int *)calloc(if_count, sizeof(*if_stack));
2218 loop_stack = (int *)calloc(loop_count, sizeof(*loop_stack));
2219
2220 for (i = 0; i < num_instructions; i++) {
2221 switch (mesa_instructions[i].Opcode) {
2222 case OPCODE_IF:
2223 if_stack[if_stack_pos] = i;
2224 if_stack_pos++;
2225 break;
2226 case OPCODE_ELSE:
2227 mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
2228 if_stack[if_stack_pos - 1] = i;
2229 break;
2230 case OPCODE_ENDIF:
2231 mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
2232 if_stack_pos--;
2233 break;
2234 case OPCODE_BGNLOOP:
2235 loop_stack[loop_stack_pos] = i;
2236 loop_stack_pos++;
2237 break;
2238 case OPCODE_ENDLOOP:
2239 loop_stack_pos--;
2240 /* Rewrite any breaks/conts at this nesting level (haven't
2241 * already had a BranchTarget assigned) to point to the end
2242 * of the loop.
2243 */
2244 for (j = loop_stack[loop_stack_pos]; j < i; j++) {
2245 if (mesa_instructions[j].Opcode == OPCODE_BRK ||
2246 mesa_instructions[j].Opcode == OPCODE_CONT) {
2247 if (mesa_instructions[j].BranchTarget == -1) {
2248 mesa_instructions[j].BranchTarget = i;
2249 }
2250 }
2251 }
2252 /* The loop ends point at each other. */
2253 mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos];
2254 mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i;
2255 break;
2256 case OPCODE_CAL:
2257 foreach_iter(exec_list_iterator, iter, v->function_signatures) {
2258 function_entry *entry = (function_entry *)iter.get();
2259
2260 if (entry->sig_id == mesa_instructions[i].BranchTarget) {
2261 mesa_instructions[i].BranchTarget = entry->inst;
2262 break;
2263 }
2264 }
2265 break;
2266 default:
2267 break;
2268 }
2269 }
2270
2271 free(if_stack);
2272 }
2273
2274 static void
2275 print_program(struct prog_instruction *mesa_instructions,
2276 ir_instruction **mesa_instruction_annotation,
2277 int num_instructions)
2278 {
2279 ir_instruction *last_ir = NULL;
2280 int i;
2281 int indent = 0;
2282
2283 for (i = 0; i < num_instructions; i++) {
2284 struct prog_instruction *mesa_inst = mesa_instructions + i;
2285 ir_instruction *ir = mesa_instruction_annotation[i];
2286
2287 fprintf(stdout, "%3d: ", i);
2288
2289 if (last_ir != ir && ir) {
2290 int j;
2291
2292 for (j = 0; j < indent; j++) {
2293 fprintf(stdout, " ");
2294 }
2295 ir->print();
2296 printf("\n");
2297 last_ir = ir;
2298
2299 fprintf(stdout, " "); /* line number spacing. */
2300 }
2301
2302 indent = _mesa_fprint_instruction_opt(stdout, mesa_inst, indent,
2303 PROG_PRINT_DEBUG, NULL);
2304 }
2305 }
2306
2307 static void
2308 mark_input(struct gl_program *prog,
2309 int index,
2310 GLboolean reladdr)
2311 {
2312 prog->InputsRead |= BITFIELD64_BIT(index);
2313 int i;
2314
2315 if (reladdr) {
2316 if (index >= FRAG_ATTRIB_TEX0 && index <= FRAG_ATTRIB_TEX7) {
2317 for (i = 0; i < 8; i++) {
2318 prog->InputsRead |= BITFIELD64_BIT(FRAG_ATTRIB_TEX0 + i);
2319 }
2320 } else {
2321 assert(!"FINISHME: Mark InputsRead for varying arrays");
2322 }
2323 }
2324 }
2325
2326 static void
2327 mark_output(struct gl_program *prog,
2328 int index,
2329 GLboolean reladdr)
2330 {
2331 prog->OutputsWritten |= BITFIELD64_BIT(index);
2332 int i;
2333
2334 if (reladdr) {
2335 if (index >= VERT_RESULT_TEX0 && index <= VERT_RESULT_TEX7) {
2336 for (i = 0; i < 8; i++) {
2337 prog->OutputsWritten |= BITFIELD64_BIT(FRAG_ATTRIB_TEX0 + i);
2338 }
2339 } else {
2340 assert(!"FINISHME: Mark OutputsWritten for varying arrays");
2341 }
2342 }
2343 }
2344
2345 static void
2346 count_resources(struct gl_program *prog)
2347 {
2348 unsigned int i;
2349
2350 prog->InputsRead = 0;
2351 prog->OutputsWritten = 0;
2352 prog->SamplersUsed = 0;
2353
2354 for (i = 0; i < prog->NumInstructions; i++) {
2355 struct prog_instruction *inst = &prog->Instructions[i];
2356 unsigned int reg;
2357
2358 switch (inst->DstReg.File) {
2359 case PROGRAM_OUTPUT:
2360 mark_output(prog, inst->DstReg.Index, inst->DstReg.RelAddr);
2361 break;
2362 case PROGRAM_INPUT:
2363 mark_input(prog, inst->DstReg.Index, inst->DstReg.RelAddr);
2364 break;
2365 default:
2366 break;
2367 }
2368
2369 for (reg = 0; reg < _mesa_num_inst_src_regs(inst->Opcode); reg++) {
2370 switch (inst->SrcReg[reg].File) {
2371 case PROGRAM_OUTPUT:
2372 mark_output(prog, inst->SrcReg[reg].Index,
2373 inst->SrcReg[reg].RelAddr);
2374 break;
2375 case PROGRAM_INPUT:
2376 mark_input(prog, inst->SrcReg[reg].Index, inst->SrcReg[reg].RelAddr);
2377 break;
2378 default:
2379 break;
2380 }
2381 }
2382
2383 /* Instead of just using the uniform's value to map to a
2384 * sampler, Mesa first allocates a separate number for the
2385 * sampler (_mesa_add_sampler), then we reindex it down to a
2386 * small integer (sampler_map[], SamplersUsed), then that gets
2387 * mapped to the uniform's value, and we get an actual sampler.
2388 */
2389 if (_mesa_is_tex_instruction(inst->Opcode)) {
2390 prog->SamplerTargets[inst->TexSrcUnit] =
2391 (gl_texture_index)inst->TexSrcTarget;
2392 prog->SamplersUsed |= 1 << inst->TexSrcUnit;
2393 if (inst->TexShadow) {
2394 prog->ShadowSamplers |= 1 << inst->TexSrcUnit;
2395 }
2396 }
2397 }
2398
2399 _mesa_update_shader_textures_used(prog);
2400 }
2401
2402 /* Each stage has some uniforms in its Parameters list. The Uniforms
2403 * list for the linked shader program has a pointer to these uniforms
2404 * in each of the stage's Parameters list, so that their values can be
2405 * updated when a uniform is set.
2406 */
2407 static void
2408 link_uniforms_to_shared_uniform_list(struct gl_uniform_list *uniforms,
2409 struct gl_program *prog)
2410 {
2411 unsigned int i;
2412
2413 for (i = 0; i < prog->Parameters->NumParameters; i++) {
2414 const struct gl_program_parameter *p = prog->Parameters->Parameters + i;
2415
2416 if (p->Type == PROGRAM_UNIFORM || p->Type == PROGRAM_SAMPLER) {
2417 struct gl_uniform *uniform =
2418 _mesa_append_uniform(uniforms, p->Name, prog->Target, i);
2419 if (uniform)
2420 uniform->Initialized = p->Initialized;
2421 }
2422 }
2423 }
2424
2425 struct gl_program *
2426 get_mesa_program(GLcontext *ctx, struct gl_shader_program *shader_program,
2427 struct gl_shader *shader)
2428 {
2429 void *mem_ctx = shader_program;
2430 ir_to_mesa_visitor v;
2431 struct prog_instruction *mesa_instructions, *mesa_inst;
2432 ir_instruction **mesa_instruction_annotation;
2433 int i;
2434 struct gl_program *prog;
2435 GLenum target;
2436 const char *target_string;
2437 GLboolean progress;
2438
2439 switch (shader->Type) {
2440 case GL_VERTEX_SHADER:
2441 target = GL_VERTEX_PROGRAM_ARB;
2442 target_string = "vertex";
2443 break;
2444 case GL_FRAGMENT_SHADER:
2445 target = GL_FRAGMENT_PROGRAM_ARB;
2446 target_string = "fragment";
2447 break;
2448 default:
2449 assert(!"should not be reached");
2450 break;
2451 }
2452
2453 validate_ir_tree(shader->ir);
2454
2455 prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
2456 if (!prog)
2457 return NULL;
2458 prog->Parameters = _mesa_new_parameter_list();
2459 prog->Varying = _mesa_new_parameter_list();
2460 prog->Attributes = _mesa_new_parameter_list();
2461 v.ctx = ctx;
2462 v.prog = prog;
2463
2464 v.mem_ctx = talloc_new(NULL);
2465
2466 /* Emit Mesa IR for main(). */
2467 visit_exec_list(shader->ir, &v);
2468 v.ir_to_mesa_emit_op0(NULL, OPCODE_END);
2469
2470 /* Now emit bodies for any functions that were used. */
2471 do {
2472 progress = GL_FALSE;
2473
2474 foreach_iter(exec_list_iterator, iter, v.function_signatures) {
2475 function_entry *entry = (function_entry *)iter.get();
2476
2477 if (!entry->bgn_inst) {
2478 v.current_function = entry;
2479
2480 entry->bgn_inst = v.ir_to_mesa_emit_op0(NULL, OPCODE_BGNSUB);
2481 entry->bgn_inst->function = entry;
2482
2483 visit_exec_list(&entry->sig->body, &v);
2484
2485 ir_to_mesa_instruction *last;
2486 last = (ir_to_mesa_instruction *)v.instructions.get_tail();
2487 if (last->op != OPCODE_RET)
2488 v.ir_to_mesa_emit_op0(NULL, OPCODE_RET);
2489
2490 ir_to_mesa_instruction *end;
2491 end = v.ir_to_mesa_emit_op0(NULL, OPCODE_ENDSUB);
2492 end->function = entry;
2493
2494 progress = GL_TRUE;
2495 }
2496 }
2497 } while (progress);
2498
2499 prog->NumTemporaries = v.next_temp;
2500
2501 int num_instructions = 0;
2502 foreach_iter(exec_list_iterator, iter, v.instructions) {
2503 num_instructions++;
2504 }
2505
2506 mesa_instructions =
2507 (struct prog_instruction *)calloc(num_instructions,
2508 sizeof(*mesa_instructions));
2509 mesa_instruction_annotation = talloc_array(mem_ctx, ir_instruction *,
2510 num_instructions);
2511
2512 mesa_inst = mesa_instructions;
2513 i = 0;
2514 foreach_iter(exec_list_iterator, iter, v.instructions) {
2515 ir_to_mesa_instruction *inst = (ir_to_mesa_instruction *)iter.get();
2516
2517 mesa_inst->Opcode = inst->op;
2518 mesa_inst->CondUpdate = inst->cond_update;
2519 mesa_inst->DstReg.File = inst->dst_reg.file;
2520 mesa_inst->DstReg.Index = inst->dst_reg.index;
2521 mesa_inst->DstReg.CondMask = inst->dst_reg.cond_mask;
2522 mesa_inst->DstReg.WriteMask = inst->dst_reg.writemask;
2523 mesa_inst->DstReg.RelAddr = inst->dst_reg.reladdr != NULL;
2524 mesa_inst->SrcReg[0] = mesa_src_reg_from_ir_src_reg(inst->src_reg[0]);
2525 mesa_inst->SrcReg[1] = mesa_src_reg_from_ir_src_reg(inst->src_reg[1]);
2526 mesa_inst->SrcReg[2] = mesa_src_reg_from_ir_src_reg(inst->src_reg[2]);
2527 mesa_inst->TexSrcUnit = inst->sampler;
2528 mesa_inst->TexSrcTarget = inst->tex_target;
2529 mesa_inst->TexShadow = inst->tex_shadow;
2530 mesa_instruction_annotation[i] = inst->ir;
2531
2532 if (ctx->Shader.EmitNoIfs && mesa_inst->Opcode == OPCODE_IF) {
2533 shader_program->InfoLog =
2534 talloc_asprintf_append(shader_program->InfoLog,
2535 "Couldn't flatten if statement\n");
2536 shader_program->LinkStatus = false;
2537 }
2538
2539 switch (mesa_inst->Opcode) {
2540 case OPCODE_BGNSUB:
2541 inst->function->inst = i;
2542 mesa_inst->Comment = strdup(inst->function->sig->function_name());
2543 break;
2544 case OPCODE_ENDSUB:
2545 mesa_inst->Comment = strdup(inst->function->sig->function_name());
2546 break;
2547 case OPCODE_CAL:
2548 mesa_inst->BranchTarget = inst->function->sig_id; /* rewritten later */
2549 break;
2550 case OPCODE_ARL:
2551 prog->NumAddressRegs = 1;
2552 break;
2553 default:
2554 break;
2555 }
2556
2557 mesa_inst++;
2558 i++;
2559 }
2560
2561 set_branchtargets(&v, mesa_instructions, num_instructions);
2562 if (ctx->Shader.Flags & GLSL_DUMP) {
2563 printf("Mesa %s program:\n", target_string);
2564 print_program(mesa_instructions, mesa_instruction_annotation,
2565 num_instructions);
2566 }
2567
2568 prog->Instructions = mesa_instructions;
2569 prog->NumInstructions = num_instructions;
2570
2571 _mesa_reference_program(ctx, &shader->Program, prog);
2572
2573 if ((ctx->Shader.Flags & GLSL_NO_OPT) == 0) {
2574 _mesa_optimize_program(ctx, prog);
2575 }
2576
2577 return prog;
2578 }
2579
2580 extern "C" {
2581
2582 void
2583 _mesa_glsl_compile_shader(GLcontext *ctx, struct gl_shader *shader)
2584 {
2585 struct _mesa_glsl_parse_state *state =
2586 new(shader) _mesa_glsl_parse_state(ctx, shader->Type, shader);
2587
2588 const char *source = shader->Source;
2589 state->error = preprocess(state, &source, &state->info_log,
2590 &ctx->Extensions);
2591
2592 if (!state->error) {
2593 _mesa_glsl_lexer_ctor(state, source);
2594 _mesa_glsl_parse(state);
2595 _mesa_glsl_lexer_dtor(state);
2596 }
2597
2598 shader->ir = new(shader) exec_list;
2599 if (!state->error && !state->translation_unit.is_empty())
2600 _mesa_ast_to_hir(shader->ir, state);
2601
2602 if (!state->error && !shader->ir->is_empty()) {
2603 validate_ir_tree(shader->ir);
2604
2605 /* Lowering */
2606 do_mat_op_to_vec(shader->ir);
2607 do_mod_to_fract(shader->ir);
2608 do_div_to_mul_rcp(shader->ir);
2609
2610 /* Optimization passes */
2611 bool progress;
2612 do {
2613 progress = false;
2614
2615 progress = do_function_inlining(shader->ir) || progress;
2616 progress = do_if_simplification(shader->ir) || progress;
2617 progress = do_copy_propagation(shader->ir) || progress;
2618 progress = do_dead_code_local(shader->ir) || progress;
2619 progress = do_dead_code_unlinked(shader->ir) || progress;
2620 progress = do_tree_grafting(shader->ir) || progress;
2621 progress = do_constant_variable_unlinked(shader->ir) || progress;
2622 progress = do_constant_folding(shader->ir) || progress;
2623 progress = do_algebraic(shader->ir) || progress;
2624 progress = do_if_return(shader->ir) || progress;
2625 if (ctx->Shader.EmitNoIfs)
2626 progress = do_if_to_cond_assign(shader->ir) || progress;
2627
2628 progress = do_vec_index_to_swizzle(shader->ir) || progress;
2629 /* Do this one after the previous to let the easier pass handle
2630 * constant vector indexing.
2631 */
2632 progress = do_vec_index_to_cond_assign(shader->ir) || progress;
2633
2634 progress = do_swizzle_swizzle(shader->ir) || progress;
2635 } while (progress);
2636
2637 validate_ir_tree(shader->ir);
2638 }
2639
2640 shader->symbols = state->symbols;
2641
2642 shader->CompileStatus = !state->error;
2643 shader->InfoLog = state->info_log;
2644 shader->Version = state->language_version;
2645 memcpy(shader->builtins_to_link, state->builtins_to_link,
2646 sizeof(shader->builtins_to_link[0]) * state->num_builtins_to_link);
2647 shader->num_builtins_to_link = state->num_builtins_to_link;
2648
2649 if (ctx->Shader.Flags & GLSL_LOG) {
2650 _mesa_write_shader_to_file(shader);
2651 }
2652
2653 /* Retain any live IR, but trash the rest. */
2654 reparent_ir(shader->ir, shader);
2655
2656 talloc_free(state);
2657 }
2658
2659 void
2660 _mesa_glsl_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
2661 {
2662 unsigned int i;
2663
2664 _mesa_clear_shader_program_data(ctx, prog);
2665
2666 prog->LinkStatus = GL_TRUE;
2667
2668 for (i = 0; i < prog->NumShaders; i++) {
2669 if (!prog->Shaders[i]->CompileStatus) {
2670 prog->InfoLog =
2671 talloc_asprintf_append(prog->InfoLog,
2672 "linking with uncompiled shader");
2673 prog->LinkStatus = GL_FALSE;
2674 }
2675 }
2676
2677 prog->Varying = _mesa_new_parameter_list();
2678 _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL);
2679 _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL);
2680
2681 if (prog->LinkStatus) {
2682 link_shaders(prog);
2683
2684 /* We don't use the linker's uniforms list, and cook up our own at
2685 * generate time.
2686 */
2687 free(prog->Uniforms);
2688 prog->Uniforms = _mesa_new_uniform_list();
2689 }
2690
2691 if (prog->LinkStatus) {
2692 for (i = 0; i < prog->_NumLinkedShaders; i++) {
2693 struct gl_program *linked_prog;
2694 bool ok = true;
2695
2696 linked_prog = get_mesa_program(ctx, prog,
2697 prog->_LinkedShaders[i]);
2698 count_resources(linked_prog);
2699
2700 link_uniforms_to_shared_uniform_list(prog->Uniforms, linked_prog);
2701
2702 switch (prog->_LinkedShaders[i]->Type) {
2703 case GL_VERTEX_SHADER:
2704 _mesa_reference_vertprog(ctx, &prog->VertexProgram,
2705 (struct gl_vertex_program *)linked_prog);
2706 ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB,
2707 linked_prog);
2708 break;
2709 case GL_FRAGMENT_SHADER:
2710 _mesa_reference_fragprog(ctx, &prog->FragmentProgram,
2711 (struct gl_fragment_program *)linked_prog);
2712 ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB,
2713 linked_prog);
2714 break;
2715 }
2716 if (!ok) {
2717 prog->LinkStatus = GL_FALSE;
2718 }
2719 }
2720 }
2721 }
2722
2723 } /* extern "C" */