ir_to_mesa: Support for struct uniforms.
[mesa.git] / src / mesa / program / ir_to_mesa.cpp
1 /*
2 * Copyright (C) 2005-2007 Brian Paul All Rights Reserved.
3 * Copyright (C) 2008 VMware, Inc. All Rights Reserved.
4 * Copyright © 2010 Intel Corporation
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
24 */
25
26 /**
27 * \file ir_to_mesa.cpp
28 *
29 * Translates the IR to ARB_fragment_program text if possible,
30 * printing the result
31 */
32
33 #include <stdio.h>
34 #include "ir.h"
35 #include "ir_visitor.h"
36 #include "ir_print_visitor.h"
37 #include "ir_expression_flattening.h"
38 #include "glsl_types.h"
39 #include "glsl_parser_extras.h"
40 #include "../glsl/program.h"
41 #include "ir_optimization.h"
42 #include "ast.h"
43
44 extern "C" {
45 #include "main/mtypes.h"
46 #include "main/shaderobj.h"
47 #include "main/uniforms.h"
48 #include "program/prog_instruction.h"
49 #include "program/prog_optimize.h"
50 #include "program/prog_print.h"
51 #include "program/program.h"
52 #include "program/prog_uniform.h"
53 #include "program/prog_parameter.h"
54 }
55
56 static int swizzle_for_size(int size);
57
58 /**
59 * This struct is a corresponding struct to Mesa prog_src_register, with
60 * wider fields.
61 */
62 typedef struct ir_to_mesa_src_reg {
63 ir_to_mesa_src_reg(int file, int index, const glsl_type *type)
64 {
65 this->file = file;
66 this->index = index;
67 if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
68 this->swizzle = swizzle_for_size(type->vector_elements);
69 else
70 this->swizzle = SWIZZLE_XYZW;
71 this->negate = 0;
72 this->reladdr = NULL;
73 }
74
75 ir_to_mesa_src_reg()
76 {
77 this->file = PROGRAM_UNDEFINED;
78 }
79
80 int file; /**< PROGRAM_* from Mesa */
81 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
82 GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
83 int negate; /**< NEGATE_XYZW mask from mesa */
84 /** Register index should be offset by the integer in this reg. */
85 ir_to_mesa_src_reg *reladdr;
86 } ir_to_mesa_src_reg;
87
88 typedef struct ir_to_mesa_dst_reg {
89 int file; /**< PROGRAM_* from Mesa */
90 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
91 int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
92 GLuint cond_mask:4;
93 /** Register index should be offset by the integer in this reg. */
94 ir_to_mesa_src_reg *reladdr;
95 } ir_to_mesa_dst_reg;
96
97 extern ir_to_mesa_src_reg ir_to_mesa_undef;
98
99 class ir_to_mesa_instruction : public exec_node {
100 public:
101 enum prog_opcode op;
102 ir_to_mesa_dst_reg dst_reg;
103 ir_to_mesa_src_reg src_reg[3];
104 /** Pointer to the ir source this tree came from for debugging */
105 ir_instruction *ir;
106 GLboolean cond_update;
107 int sampler; /**< sampler index */
108 int tex_target; /**< One of TEXTURE_*_INDEX */
109 GLboolean tex_shadow;
110
111 class function_entry *function; /* Set on OPCODE_CAL or OPCODE_BGNSUB */
112 };
113
114 class variable_storage : public exec_node {
115 public:
116 variable_storage(ir_variable *var, int file, int index)
117 : file(file), index(index), var(var)
118 {
119 /* empty */
120 }
121
122 int file;
123 int index;
124 ir_variable *var; /* variable that maps to this, if any */
125 };
126
127 class function_entry : public exec_node {
128 public:
129 ir_function_signature *sig;
130
131 /**
132 * identifier of this function signature used by the program.
133 *
134 * At the point that Mesa instructions for function calls are
135 * generated, we don't know the address of the first instruction of
136 * the function body. So we make the BranchTarget that is called a
137 * small integer and rewrite them during set_branchtargets().
138 */
139 int sig_id;
140
141 /**
142 * Pointer to first instruction of the function body.
143 *
144 * Set during function body emits after main() is processed.
145 */
146 ir_to_mesa_instruction *bgn_inst;
147
148 /**
149 * Index of the first instruction of the function body in actual
150 * Mesa IR.
151 *
152 * Set after convertion from ir_to_mesa_instruction to prog_instruction.
153 */
154 int inst;
155
156 /** Storage for the return value. */
157 ir_to_mesa_src_reg return_reg;
158 };
159
160 class ir_to_mesa_visitor : public ir_visitor {
161 public:
162 ir_to_mesa_visitor();
163
164 function_entry *current_function;
165
166 GLcontext *ctx;
167 struct gl_program *prog;
168
169 int next_temp;
170
171 variable_storage *find_variable_storage(ir_variable *var);
172
173 function_entry *get_function_signature(ir_function_signature *sig);
174
175 ir_to_mesa_src_reg get_temp(const glsl_type *type);
176 void reladdr_to_temp(ir_instruction *ir,
177 ir_to_mesa_src_reg *reg, int *num_reladdr);
178
179 struct ir_to_mesa_src_reg src_reg_for_float(float val);
180
181 /**
182 * \name Visit methods
183 *
184 * As typical for the visitor pattern, there must be one \c visit method for
185 * each concrete subclass of \c ir_instruction. Virtual base classes within
186 * the hierarchy should not have \c visit methods.
187 */
188 /*@{*/
189 virtual void visit(ir_variable *);
190 virtual void visit(ir_loop *);
191 virtual void visit(ir_loop_jump *);
192 virtual void visit(ir_function_signature *);
193 virtual void visit(ir_function *);
194 virtual void visit(ir_expression *);
195 virtual void visit(ir_swizzle *);
196 virtual void visit(ir_dereference_variable *);
197 virtual void visit(ir_dereference_array *);
198 virtual void visit(ir_dereference_record *);
199 virtual void visit(ir_assignment *);
200 virtual void visit(ir_constant *);
201 virtual void visit(ir_call *);
202 virtual void visit(ir_return *);
203 virtual void visit(ir_discard *);
204 virtual void visit(ir_texture *);
205 virtual void visit(ir_if *);
206 /*@}*/
207
208 struct ir_to_mesa_src_reg result;
209
210 /** List of variable_storage */
211 exec_list variables;
212
213 /** List of function_entry */
214 exec_list function_signatures;
215 int next_signature_id;
216
217 /** List of ir_to_mesa_instruction */
218 exec_list instructions;
219
220 ir_to_mesa_instruction *ir_to_mesa_emit_op0(ir_instruction *ir,
221 enum prog_opcode op);
222
223 ir_to_mesa_instruction *ir_to_mesa_emit_op1(ir_instruction *ir,
224 enum prog_opcode op,
225 ir_to_mesa_dst_reg dst,
226 ir_to_mesa_src_reg src0);
227
228 ir_to_mesa_instruction *ir_to_mesa_emit_op2(ir_instruction *ir,
229 enum prog_opcode op,
230 ir_to_mesa_dst_reg dst,
231 ir_to_mesa_src_reg src0,
232 ir_to_mesa_src_reg src1);
233
234 ir_to_mesa_instruction *ir_to_mesa_emit_op3(ir_instruction *ir,
235 enum prog_opcode op,
236 ir_to_mesa_dst_reg dst,
237 ir_to_mesa_src_reg src0,
238 ir_to_mesa_src_reg src1,
239 ir_to_mesa_src_reg src2);
240
241 void ir_to_mesa_emit_scalar_op1(ir_instruction *ir,
242 enum prog_opcode op,
243 ir_to_mesa_dst_reg dst,
244 ir_to_mesa_src_reg src0);
245
246 void ir_to_mesa_emit_scalar_op2(ir_instruction *ir,
247 enum prog_opcode op,
248 ir_to_mesa_dst_reg dst,
249 ir_to_mesa_src_reg src0,
250 ir_to_mesa_src_reg src1);
251
252 GLboolean try_emit_mad(ir_expression *ir,
253 int mul_operand);
254
255 void add_aggregate_uniform(ir_instruction *ir,
256 const char *name,
257 const struct glsl_type *type,
258 struct ir_to_mesa_dst_reg temp);
259
260 int *sampler_map;
261 int sampler_map_size;
262
263 void map_sampler(int location, int sampler);
264 int get_sampler_number(int location);
265
266 void *mem_ctx;
267 };
268
269 ir_to_mesa_src_reg ir_to_mesa_undef = ir_to_mesa_src_reg(PROGRAM_UNDEFINED, 0, NULL);
270
271 ir_to_mesa_dst_reg ir_to_mesa_undef_dst = {
272 PROGRAM_UNDEFINED, 0, SWIZZLE_NOOP, COND_TR, NULL,
273 };
274
275 ir_to_mesa_dst_reg ir_to_mesa_address_reg = {
276 PROGRAM_ADDRESS, 0, WRITEMASK_X, COND_TR, NULL
277 };
278
279 static int swizzle_for_size(int size)
280 {
281 int size_swizzles[4] = {
282 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
283 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
284 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
285 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
286 };
287
288 return size_swizzles[size - 1];
289 }
290
291 ir_to_mesa_instruction *
292 ir_to_mesa_visitor::ir_to_mesa_emit_op3(ir_instruction *ir,
293 enum prog_opcode op,
294 ir_to_mesa_dst_reg dst,
295 ir_to_mesa_src_reg src0,
296 ir_to_mesa_src_reg src1,
297 ir_to_mesa_src_reg src2)
298 {
299 ir_to_mesa_instruction *inst = new(mem_ctx) ir_to_mesa_instruction();
300 int num_reladdr = 0;
301
302 /* If we have to do relative addressing, we want to load the ARL
303 * reg directly for one of the regs, and preload the other reladdr
304 * sources into temps.
305 */
306 num_reladdr += dst.reladdr != NULL;
307 num_reladdr += src0.reladdr != NULL;
308 num_reladdr += src1.reladdr != NULL;
309 num_reladdr += src2.reladdr != NULL;
310
311 reladdr_to_temp(ir, &src2, &num_reladdr);
312 reladdr_to_temp(ir, &src1, &num_reladdr);
313 reladdr_to_temp(ir, &src0, &num_reladdr);
314
315 if (dst.reladdr) {
316 ir_to_mesa_emit_op1(ir, OPCODE_ARL, ir_to_mesa_address_reg,
317 *dst.reladdr);
318
319 num_reladdr--;
320 }
321 assert(num_reladdr == 0);
322
323 inst->op = op;
324 inst->dst_reg = dst;
325 inst->src_reg[0] = src0;
326 inst->src_reg[1] = src1;
327 inst->src_reg[2] = src2;
328 inst->ir = ir;
329
330 inst->function = NULL;
331
332 this->instructions.push_tail(inst);
333
334 return inst;
335 }
336
337
338 ir_to_mesa_instruction *
339 ir_to_mesa_visitor::ir_to_mesa_emit_op2(ir_instruction *ir,
340 enum prog_opcode op,
341 ir_to_mesa_dst_reg dst,
342 ir_to_mesa_src_reg src0,
343 ir_to_mesa_src_reg src1)
344 {
345 return ir_to_mesa_emit_op3(ir, op, dst, src0, src1, ir_to_mesa_undef);
346 }
347
348 ir_to_mesa_instruction *
349 ir_to_mesa_visitor::ir_to_mesa_emit_op1(ir_instruction *ir,
350 enum prog_opcode op,
351 ir_to_mesa_dst_reg dst,
352 ir_to_mesa_src_reg src0)
353 {
354 return ir_to_mesa_emit_op3(ir, op, dst,
355 src0, ir_to_mesa_undef, ir_to_mesa_undef);
356 }
357
358 ir_to_mesa_instruction *
359 ir_to_mesa_visitor::ir_to_mesa_emit_op0(ir_instruction *ir,
360 enum prog_opcode op)
361 {
362 return ir_to_mesa_emit_op3(ir, op, ir_to_mesa_undef_dst,
363 ir_to_mesa_undef,
364 ir_to_mesa_undef,
365 ir_to_mesa_undef);
366 }
367
368 void
369 ir_to_mesa_visitor::map_sampler(int location, int sampler)
370 {
371 if (this->sampler_map_size <= location) {
372 this->sampler_map = talloc_realloc(this->mem_ctx, this->sampler_map,
373 int, location + 1);
374 this->sampler_map_size = location + 1;
375 }
376
377 this->sampler_map[location] = sampler;
378 }
379
380 int
381 ir_to_mesa_visitor::get_sampler_number(int location)
382 {
383 assert(location < this->sampler_map_size);
384 return this->sampler_map[location];
385 }
386
387 inline ir_to_mesa_dst_reg
388 ir_to_mesa_dst_reg_from_src(ir_to_mesa_src_reg reg)
389 {
390 ir_to_mesa_dst_reg dst_reg;
391
392 dst_reg.file = reg.file;
393 dst_reg.index = reg.index;
394 dst_reg.writemask = WRITEMASK_XYZW;
395 dst_reg.cond_mask = COND_TR;
396 dst_reg.reladdr = reg.reladdr;
397
398 return dst_reg;
399 }
400
401 inline ir_to_mesa_src_reg
402 ir_to_mesa_src_reg_from_dst(ir_to_mesa_dst_reg reg)
403 {
404 return ir_to_mesa_src_reg(reg.file, reg.index, NULL);
405 }
406
407 /**
408 * Emits Mesa scalar opcodes to produce unique answers across channels.
409 *
410 * Some Mesa opcodes are scalar-only, like ARB_fp/vp. The src X
411 * channel determines the result across all channels. So to do a vec4
412 * of this operation, we want to emit a scalar per source channel used
413 * to produce dest channels.
414 */
415 void
416 ir_to_mesa_visitor::ir_to_mesa_emit_scalar_op2(ir_instruction *ir,
417 enum prog_opcode op,
418 ir_to_mesa_dst_reg dst,
419 ir_to_mesa_src_reg orig_src0,
420 ir_to_mesa_src_reg orig_src1)
421 {
422 int i, j;
423 int done_mask = ~dst.writemask;
424
425 /* Mesa RCP is a scalar operation splatting results to all channels,
426 * like ARB_fp/vp. So emit as many RCPs as necessary to cover our
427 * dst channels.
428 */
429 for (i = 0; i < 4; i++) {
430 GLuint this_mask = (1 << i);
431 ir_to_mesa_instruction *inst;
432 ir_to_mesa_src_reg src0 = orig_src0;
433 ir_to_mesa_src_reg src1 = orig_src1;
434
435 if (done_mask & this_mask)
436 continue;
437
438 GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
439 GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
440 for (j = i + 1; j < 4; j++) {
441 if (!(done_mask & (1 << j)) &&
442 GET_SWZ(src0.swizzle, j) == src0_swiz &&
443 GET_SWZ(src1.swizzle, j) == src1_swiz) {
444 this_mask |= (1 << j);
445 }
446 }
447 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
448 src0_swiz, src0_swiz);
449 src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
450 src1_swiz, src1_swiz);
451
452 inst = ir_to_mesa_emit_op2(ir, op,
453 dst,
454 src0,
455 src1);
456 inst->dst_reg.writemask = this_mask;
457 done_mask |= this_mask;
458 }
459 }
460
461 void
462 ir_to_mesa_visitor::ir_to_mesa_emit_scalar_op1(ir_instruction *ir,
463 enum prog_opcode op,
464 ir_to_mesa_dst_reg dst,
465 ir_to_mesa_src_reg src0)
466 {
467 ir_to_mesa_src_reg undef = ir_to_mesa_undef;
468
469 undef.swizzle = SWIZZLE_XXXX;
470
471 ir_to_mesa_emit_scalar_op2(ir, op, dst, src0, undef);
472 }
473
474 struct ir_to_mesa_src_reg
475 ir_to_mesa_visitor::src_reg_for_float(float val)
476 {
477 ir_to_mesa_src_reg src_reg(PROGRAM_CONSTANT, -1, NULL);
478
479 src_reg.index = _mesa_add_unnamed_constant(this->prog->Parameters,
480 &val, 1, &src_reg.swizzle);
481
482 return src_reg;
483 }
484
485 static int
486 type_size(const struct glsl_type *type)
487 {
488 unsigned int i;
489 int size;
490
491 switch (type->base_type) {
492 case GLSL_TYPE_UINT:
493 case GLSL_TYPE_INT:
494 case GLSL_TYPE_FLOAT:
495 case GLSL_TYPE_BOOL:
496 if (type->is_matrix()) {
497 return type->matrix_columns;
498 } else {
499 /* Regardless of size of vector, it gets a vec4. This is bad
500 * packing for things like floats, but otherwise arrays become a
501 * mess. Hopefully a later pass over the code can pack scalars
502 * down if appropriate.
503 */
504 return 1;
505 }
506 case GLSL_TYPE_ARRAY:
507 return type_size(type->fields.array) * type->length;
508 case GLSL_TYPE_STRUCT:
509 size = 0;
510 for (i = 0; i < type->length; i++) {
511 size += type_size(type->fields.structure[i].type);
512 }
513 return size;
514 default:
515 assert(0);
516 }
517 }
518
519 /**
520 * In the initial pass of codegen, we assign temporary numbers to
521 * intermediate results. (not SSA -- variable assignments will reuse
522 * storage). Actual register allocation for the Mesa VM occurs in a
523 * pass over the Mesa IR later.
524 */
525 ir_to_mesa_src_reg
526 ir_to_mesa_visitor::get_temp(const glsl_type *type)
527 {
528 ir_to_mesa_src_reg src_reg;
529 int swizzle[4];
530 int i;
531
532 src_reg.file = PROGRAM_TEMPORARY;
533 src_reg.index = next_temp;
534 src_reg.reladdr = NULL;
535 next_temp += type_size(type);
536
537 if (type->is_array() || type->is_record()) {
538 src_reg.swizzle = SWIZZLE_NOOP;
539 } else {
540 for (i = 0; i < type->vector_elements; i++)
541 swizzle[i] = i;
542 for (; i < 4; i++)
543 swizzle[i] = type->vector_elements - 1;
544 src_reg.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1],
545 swizzle[2], swizzle[3]);
546 }
547 src_reg.negate = 0;
548
549 return src_reg;
550 }
551
552 variable_storage *
553 ir_to_mesa_visitor::find_variable_storage(ir_variable *var)
554 {
555
556 variable_storage *entry;
557
558 foreach_iter(exec_list_iterator, iter, this->variables) {
559 entry = (variable_storage *)iter.get();
560
561 if (entry->var == var)
562 return entry;
563 }
564
565 return NULL;
566 }
567
568 void
569 ir_to_mesa_visitor::visit(ir_variable *ir)
570 {
571 if (strcmp(ir->name, "gl_FragCoord") == 0) {
572 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
573
574 fp->OriginUpperLeft = ir->origin_upper_left;
575 fp->PixelCenterInteger = ir->pixel_center_integer;
576 }
577 }
578
579 void
580 ir_to_mesa_visitor::visit(ir_loop *ir)
581 {
582 assert(!ir->from);
583 assert(!ir->to);
584 assert(!ir->increment);
585 assert(!ir->counter);
586
587 ir_to_mesa_emit_op0(NULL, OPCODE_BGNLOOP);
588 visit_exec_list(&ir->body_instructions, this);
589 ir_to_mesa_emit_op0(NULL, OPCODE_ENDLOOP);
590 }
591
592 void
593 ir_to_mesa_visitor::visit(ir_loop_jump *ir)
594 {
595 switch (ir->mode) {
596 case ir_loop_jump::jump_break:
597 ir_to_mesa_emit_op0(NULL, OPCODE_BRK);
598 break;
599 case ir_loop_jump::jump_continue:
600 ir_to_mesa_emit_op0(NULL, OPCODE_CONT);
601 break;
602 }
603 }
604
605
606 void
607 ir_to_mesa_visitor::visit(ir_function_signature *ir)
608 {
609 assert(0);
610 (void)ir;
611 }
612
613 void
614 ir_to_mesa_visitor::visit(ir_function *ir)
615 {
616 /* Ignore function bodies other than main() -- we shouldn't see calls to
617 * them since they should all be inlined before we get to ir_to_mesa.
618 */
619 if (strcmp(ir->name, "main") == 0) {
620 const ir_function_signature *sig;
621 exec_list empty;
622
623 sig = ir->matching_signature(&empty);
624
625 assert(sig);
626
627 foreach_iter(exec_list_iterator, iter, sig->body) {
628 ir_instruction *ir = (ir_instruction *)iter.get();
629
630 ir->accept(this);
631 }
632 }
633 }
634
635 GLboolean
636 ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
637 {
638 int nonmul_operand = 1 - mul_operand;
639 ir_to_mesa_src_reg a, b, c;
640
641 ir_expression *expr = ir->operands[mul_operand]->as_expression();
642 if (!expr || expr->operation != ir_binop_mul)
643 return false;
644
645 expr->operands[0]->accept(this);
646 a = this->result;
647 expr->operands[1]->accept(this);
648 b = this->result;
649 ir->operands[nonmul_operand]->accept(this);
650 c = this->result;
651
652 this->result = get_temp(ir->type);
653 ir_to_mesa_emit_op3(ir, OPCODE_MAD,
654 ir_to_mesa_dst_reg_from_src(this->result), a, b, c);
655
656 return true;
657 }
658
659 void
660 ir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir,
661 ir_to_mesa_src_reg *reg, int *num_reladdr)
662 {
663 if (!reg->reladdr)
664 return;
665
666 ir_to_mesa_emit_op1(ir, OPCODE_ARL, ir_to_mesa_address_reg, *reg->reladdr);
667
668 if (*num_reladdr != 1) {
669 ir_to_mesa_src_reg temp = get_temp(glsl_type::vec4_type);
670
671 ir_to_mesa_emit_op1(ir, OPCODE_MOV,
672 ir_to_mesa_dst_reg_from_src(temp), *reg);
673 *reg = temp;
674 }
675
676 (*num_reladdr)--;
677 }
678
679 void
680 ir_to_mesa_visitor::visit(ir_expression *ir)
681 {
682 unsigned int operand;
683 struct ir_to_mesa_src_reg op[2];
684 struct ir_to_mesa_src_reg result_src;
685 struct ir_to_mesa_dst_reg result_dst;
686 const glsl_type *vec4_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 4, 1);
687 const glsl_type *vec3_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 3, 1);
688 const glsl_type *vec2_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, 2, 1);
689
690 /* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c)
691 */
692 if (ir->operation == ir_binop_add) {
693 if (try_emit_mad(ir, 1))
694 return;
695 if (try_emit_mad(ir, 0))
696 return;
697 }
698
699 for (operand = 0; operand < ir->get_num_operands(); operand++) {
700 this->result.file = PROGRAM_UNDEFINED;
701 ir->operands[operand]->accept(this);
702 if (this->result.file == PROGRAM_UNDEFINED) {
703 ir_print_visitor v;
704 printf("Failed to get tree for expression operand:\n");
705 ir->operands[operand]->accept(&v);
706 exit(1);
707 }
708 op[operand] = this->result;
709
710 /* Matrix expression operands should have been broken down to vector
711 * operations already.
712 */
713 assert(!ir->operands[operand]->type->is_matrix());
714 }
715
716 this->result.file = PROGRAM_UNDEFINED;
717
718 /* Storage for our result. Ideally for an assignment we'd be using
719 * the actual storage for the result here, instead.
720 */
721 result_src = get_temp(ir->type);
722 /* convenience for the emit functions below. */
723 result_dst = ir_to_mesa_dst_reg_from_src(result_src);
724 /* Limit writes to the channels that will be used by result_src later.
725 * This does limit this temp's use as a temporary for multi-instruction
726 * sequences.
727 */
728 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
729
730 switch (ir->operation) {
731 case ir_unop_logic_not:
732 ir_to_mesa_emit_op2(ir, OPCODE_SEQ, result_dst,
733 op[0], src_reg_for_float(0.0));
734 break;
735 case ir_unop_neg:
736 op[0].negate = ~op[0].negate;
737 result_src = op[0];
738 break;
739 case ir_unop_abs:
740 ir_to_mesa_emit_op1(ir, OPCODE_ABS, result_dst, op[0]);
741 break;
742 case ir_unop_sign:
743 ir_to_mesa_emit_op1(ir, OPCODE_SSG, result_dst, op[0]);
744 break;
745 case ir_unop_rcp:
746 ir_to_mesa_emit_scalar_op1(ir, OPCODE_RCP, result_dst, op[0]);
747 break;
748
749 case ir_unop_exp:
750 ir_to_mesa_emit_scalar_op2(ir, OPCODE_POW, result_dst,
751 src_reg_for_float(M_E), op[0]);
752 break;
753 case ir_unop_exp2:
754 ir_to_mesa_emit_scalar_op1(ir, OPCODE_EX2, result_dst, op[0]);
755 break;
756 case ir_unop_log:
757 ir_to_mesa_emit_scalar_op1(ir, OPCODE_LOG, result_dst, op[0]);
758 break;
759 case ir_unop_log2:
760 ir_to_mesa_emit_scalar_op1(ir, OPCODE_LG2, result_dst, op[0]);
761 break;
762 case ir_unop_sin:
763 ir_to_mesa_emit_scalar_op1(ir, OPCODE_SIN, result_dst, op[0]);
764 break;
765 case ir_unop_cos:
766 ir_to_mesa_emit_scalar_op1(ir, OPCODE_COS, result_dst, op[0]);
767 break;
768
769 case ir_unop_dFdx:
770 ir_to_mesa_emit_op1(ir, OPCODE_DDX, result_dst, op[0]);
771 break;
772 case ir_unop_dFdy:
773 ir_to_mesa_emit_op1(ir, OPCODE_DDY, result_dst, op[0]);
774 break;
775
776 case ir_binop_add:
777 ir_to_mesa_emit_op2(ir, OPCODE_ADD, result_dst, op[0], op[1]);
778 break;
779 case ir_binop_sub:
780 ir_to_mesa_emit_op2(ir, OPCODE_SUB, result_dst, op[0], op[1]);
781 break;
782
783 case ir_binop_mul:
784 ir_to_mesa_emit_op2(ir, OPCODE_MUL, result_dst, op[0], op[1]);
785 break;
786 case ir_binop_div:
787 assert(!"not reached: should be handled by ir_div_to_mul_rcp");
788 case ir_binop_mod:
789 assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
790 break;
791
792 case ir_binop_less:
793 ir_to_mesa_emit_op2(ir, OPCODE_SLT, result_dst, op[0], op[1]);
794 break;
795 case ir_binop_greater:
796 ir_to_mesa_emit_op2(ir, OPCODE_SGT, result_dst, op[0], op[1]);
797 break;
798 case ir_binop_lequal:
799 ir_to_mesa_emit_op2(ir, OPCODE_SLE, result_dst, op[0], op[1]);
800 break;
801 case ir_binop_gequal:
802 ir_to_mesa_emit_op2(ir, OPCODE_SGE, result_dst, op[0], op[1]);
803 break;
804 case ir_binop_equal:
805 ir_to_mesa_emit_op2(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
806 break;
807 case ir_binop_logic_xor:
808 case ir_binop_nequal:
809 ir_to_mesa_emit_op2(ir, OPCODE_SNE, result_dst, op[0], op[1]);
810 break;
811
812 case ir_binop_logic_or:
813 /* This could be a saturated add and skip the SNE. */
814 ir_to_mesa_emit_op2(ir, OPCODE_ADD,
815 result_dst,
816 op[0], op[1]);
817
818 ir_to_mesa_emit_op2(ir, OPCODE_SNE,
819 result_dst,
820 result_src, src_reg_for_float(0.0));
821 break;
822
823 case ir_binop_logic_and:
824 /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
825 ir_to_mesa_emit_op2(ir, OPCODE_MUL,
826 result_dst,
827 op[0], op[1]);
828 break;
829
830 case ir_binop_dot:
831 if (ir->operands[0]->type == vec4_type) {
832 assert(ir->operands[1]->type == vec4_type);
833 ir_to_mesa_emit_op2(ir, OPCODE_DP4,
834 result_dst,
835 op[0], op[1]);
836 } else if (ir->operands[0]->type == vec3_type) {
837 assert(ir->operands[1]->type == vec3_type);
838 ir_to_mesa_emit_op2(ir, OPCODE_DP3,
839 result_dst,
840 op[0], op[1]);
841 } else if (ir->operands[0]->type == vec2_type) {
842 assert(ir->operands[1]->type == vec2_type);
843 ir_to_mesa_emit_op2(ir, OPCODE_DP2,
844 result_dst,
845 op[0], op[1]);
846 }
847 break;
848
849 case ir_binop_cross:
850 ir_to_mesa_emit_op2(ir, OPCODE_XPD, result_dst, op[0], op[1]);
851 break;
852
853 case ir_unop_sqrt:
854 ir_to_mesa_emit_scalar_op1(ir, OPCODE_RSQ, result_dst, op[0]);
855 ir_to_mesa_emit_scalar_op1(ir, OPCODE_RCP, result_dst, result_src);
856 /* For incoming channels < 0, set the result to 0. */
857 ir_to_mesa_emit_op3(ir, OPCODE_CMP, result_dst,
858 op[0], src_reg_for_float(0.0), result_src);
859 break;
860 case ir_unop_rsq:
861 ir_to_mesa_emit_scalar_op1(ir, OPCODE_RSQ, result_dst, op[0]);
862 break;
863 case ir_unop_i2f:
864 case ir_unop_b2f:
865 case ir_unop_b2i:
866 /* Mesa IR lacks types, ints are stored as truncated floats. */
867 result_src = op[0];
868 break;
869 case ir_unop_f2i:
870 ir_to_mesa_emit_op1(ir, OPCODE_TRUNC, result_dst, op[0]);
871 break;
872 case ir_unop_f2b:
873 case ir_unop_i2b:
874 ir_to_mesa_emit_op2(ir, OPCODE_SNE, result_dst,
875 result_src, src_reg_for_float(0.0));
876 break;
877 case ir_unop_trunc:
878 ir_to_mesa_emit_op1(ir, OPCODE_TRUNC, result_dst, op[0]);
879 break;
880 case ir_unop_ceil:
881 op[0].negate = ~op[0].negate;
882 ir_to_mesa_emit_op1(ir, OPCODE_FLR, result_dst, op[0]);
883 result_src.negate = ~result_src.negate;
884 break;
885 case ir_unop_floor:
886 ir_to_mesa_emit_op1(ir, OPCODE_FLR, result_dst, op[0]);
887 break;
888 case ir_unop_fract:
889 ir_to_mesa_emit_op1(ir, OPCODE_FRC, result_dst, op[0]);
890 break;
891
892 case ir_binop_min:
893 ir_to_mesa_emit_op2(ir, OPCODE_MIN, result_dst, op[0], op[1]);
894 break;
895 case ir_binop_max:
896 ir_to_mesa_emit_op2(ir, OPCODE_MAX, result_dst, op[0], op[1]);
897 break;
898 case ir_binop_pow:
899 ir_to_mesa_emit_scalar_op2(ir, OPCODE_POW, result_dst, op[0], op[1]);
900 break;
901
902 case ir_unop_bit_not:
903 case ir_unop_u2f:
904 case ir_binop_lshift:
905 case ir_binop_rshift:
906 case ir_binop_bit_and:
907 case ir_binop_bit_xor:
908 case ir_binop_bit_or:
909 assert(!"GLSL 1.30 features unsupported");
910 break;
911 }
912
913 this->result = result_src;
914 }
915
916
917 void
918 ir_to_mesa_visitor::visit(ir_swizzle *ir)
919 {
920 ir_to_mesa_src_reg src_reg;
921 int i;
922 int swizzle[4];
923
924 /* Note that this is only swizzles in expressions, not those on the left
925 * hand side of an assignment, which do write masking. See ir_assignment
926 * for that.
927 */
928
929 ir->val->accept(this);
930 src_reg = this->result;
931 assert(src_reg.file != PROGRAM_UNDEFINED);
932
933 for (i = 0; i < 4; i++) {
934 if (i < ir->type->vector_elements) {
935 switch (i) {
936 case 0:
937 swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.x);
938 break;
939 case 1:
940 swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.y);
941 break;
942 case 2:
943 swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.z);
944 break;
945 case 3:
946 swizzle[i] = GET_SWZ(src_reg.swizzle, ir->mask.w);
947 break;
948 }
949 } else {
950 /* If the type is smaller than a vec4, replicate the last
951 * channel out.
952 */
953 swizzle[i] = swizzle[ir->type->vector_elements - 1];
954 }
955 }
956
957 src_reg.swizzle = MAKE_SWIZZLE4(swizzle[0],
958 swizzle[1],
959 swizzle[2],
960 swizzle[3]);
961
962 this->result = src_reg;
963 }
964
965 static const struct {
966 const char *name;
967 const char *field;
968 int tokens[STATE_LENGTH];
969 int swizzle;
970 bool array_indexed;
971 } statevars[] = {
972 {"gl_DepthRange", "near",
973 {STATE_DEPTH_RANGE, 0, 0}, SWIZZLE_XXXX},
974 {"gl_DepthRange", "far",
975 {STATE_DEPTH_RANGE, 0, 0}, SWIZZLE_YYYY},
976 {"gl_DepthRange", "diff",
977 {STATE_DEPTH_RANGE, 0, 0}, SWIZZLE_ZZZZ},
978
979 {"gl_ClipPlane", NULL,
980 {STATE_CLIPPLANE, 0, 0}, SWIZZLE_XYZW, true}
981 ,
982 {"gl_Point", "size",
983 {STATE_POINT_SIZE}, SWIZZLE_XXXX},
984 {"gl_Point", "sizeMin",
985 {STATE_POINT_SIZE}, SWIZZLE_YYYY},
986 {"gl_Point", "sizeMax",
987 {STATE_POINT_SIZE}, SWIZZLE_ZZZZ},
988 {"gl_Point", "fadeThresholdSize",
989 {STATE_POINT_SIZE}, SWIZZLE_WWWW},
990 {"gl_Point", "distanceConstantAttenuation",
991 {STATE_POINT_ATTENUATION}, SWIZZLE_XXXX},
992 {"gl_Point", "distanceLinearAttenuation",
993 {STATE_POINT_ATTENUATION}, SWIZZLE_YYYY},
994 {"gl_Point", "distanceQuadraticAttenuation",
995 {STATE_POINT_ATTENUATION}, SWIZZLE_ZZZZ},
996
997 {"gl_FrontMaterial", "emission",
998 {STATE_MATERIAL, 0, STATE_EMISSION}, SWIZZLE_XYZW},
999 {"gl_FrontMaterial", "ambient",
1000 {STATE_MATERIAL, 0, STATE_AMBIENT}, SWIZZLE_XYZW},
1001 {"gl_FrontMaterial", "diffuse",
1002 {STATE_MATERIAL, 0, STATE_DIFFUSE}, SWIZZLE_XYZW},
1003 {"gl_FrontMaterial", "specular",
1004 {STATE_MATERIAL, 0, STATE_SPECULAR}, SWIZZLE_XYZW},
1005 {"gl_FrontMaterial", "shininess",
1006 {STATE_MATERIAL, 0, STATE_SHININESS}, SWIZZLE_XXXX},
1007
1008 {"gl_BackMaterial", "emission",
1009 {STATE_MATERIAL, 1, STATE_EMISSION}, SWIZZLE_XYZW},
1010 {"gl_BackMaterial", "ambient",
1011 {STATE_MATERIAL, 1, STATE_AMBIENT}, SWIZZLE_XYZW},
1012 {"gl_BackMaterial", "diffuse",
1013 {STATE_MATERIAL, 1, STATE_DIFFUSE}, SWIZZLE_XYZW},
1014 {"gl_BackMaterial", "specular",
1015 {STATE_MATERIAL, 1, STATE_SPECULAR}, SWIZZLE_XYZW},
1016 {"gl_BackMaterial", "shininess",
1017 {STATE_MATERIAL, 1, STATE_SHININESS}, SWIZZLE_XXXX},
1018
1019 {"gl_LightSource", "ambient",
1020 {STATE_LIGHT, 0, STATE_AMBIENT}, SWIZZLE_XYZW, true},
1021 {"gl_LightSource", "diffuse",
1022 {STATE_LIGHT, 0, STATE_DIFFUSE}, SWIZZLE_XYZW, true},
1023 {"gl_LightSource", "specular",
1024 {STATE_LIGHT, 0, STATE_SPECULAR}, SWIZZLE_XYZW, true},
1025 {"gl_LightSource", "position",
1026 {STATE_LIGHT, 0, STATE_POSITION}, SWIZZLE_XYZW, true},
1027 {"gl_LightSource", "halfVector",
1028 {STATE_LIGHT, 0, STATE_HALF_VECTOR}, SWIZZLE_XYZW, true},
1029 {"gl_LightSource", "spotDirection",
1030 {STATE_LIGHT, 0, STATE_SPOT_DIRECTION}, SWIZZLE_XYZW, true},
1031 {"gl_LightSource", "spotCosCutoff",
1032 {STATE_LIGHT, 0, STATE_SPOT_DIRECTION}, SWIZZLE_WWWW, true},
1033 {"gl_LightSource", "spotCutoff",
1034 {STATE_LIGHT, 0, STATE_SPOT_CUTOFF}, SWIZZLE_XXXX, true},
1035 {"gl_LightSource", "spotExponent",
1036 {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_WWWW, true},
1037 {"gl_LightSource", "constantAttenuation",
1038 {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_XXXX, true},
1039 {"gl_LightSource", "linearAttenuation",
1040 {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_YYYY, true},
1041 {"gl_LightSource", "quadraticAttenuation",
1042 {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_ZZZZ, true},
1043
1044 {"gl_LightModel", NULL,
1045 {STATE_LIGHTMODEL_AMBIENT, 0}, SWIZZLE_XYZW},
1046
1047 {"gl_FrontLightModelProduct", NULL,
1048 {STATE_LIGHTMODEL_SCENECOLOR, 0}, SWIZZLE_XYZW},
1049 {"gl_BackLightModelProduct", NULL,
1050 {STATE_LIGHTMODEL_SCENECOLOR, 1}, SWIZZLE_XYZW},
1051
1052 {"gl_FrontLightProduct", "ambient",
1053 {STATE_LIGHTPROD, 0, 0, STATE_AMBIENT}, SWIZZLE_XYZW, true},
1054 {"gl_FrontLightProduct", "diffuse",
1055 {STATE_LIGHTPROD, 0, 0, STATE_DIFFUSE}, SWIZZLE_XYZW, true},
1056 {"gl_FrontLightProduct", "specular",
1057 {STATE_LIGHTPROD, 0, 0, STATE_SPECULAR}, SWIZZLE_XYZW, true},
1058
1059 {"gl_BackLightProduct", "ambient",
1060 {STATE_LIGHTPROD, 0, 1, STATE_AMBIENT}, SWIZZLE_XYZW, true},
1061 {"gl_BackLightProduct", "diffuse",
1062 {STATE_LIGHTPROD, 0, 1, STATE_DIFFUSE}, SWIZZLE_XYZW, true},
1063 {"gl_BackLightProduct", "specular",
1064 {STATE_LIGHTPROD, 0, 1, STATE_SPECULAR}, SWIZZLE_XYZW, true},
1065
1066 {"gl_TextureEnvColor", "ambient",
1067 {STATE_TEXENV_COLOR, 0}, SWIZZLE_XYZW, true},
1068
1069 {"gl_EyePlaneS", NULL,
1070 {STATE_TEXGEN, 0, STATE_TEXGEN_EYE_S}, SWIZZLE_XYZW, true},
1071 {"gl_EyePlaneT", NULL,
1072 {STATE_TEXGEN, 0, STATE_TEXGEN_EYE_T}, SWIZZLE_XYZW, true},
1073 {"gl_EyePlaneR", NULL,
1074 {STATE_TEXGEN, 0, STATE_TEXGEN_EYE_R}, SWIZZLE_XYZW, true},
1075 {"gl_EyePlaneQ", NULL,
1076 {STATE_TEXGEN, 0, STATE_TEXGEN_EYE_Q}, SWIZZLE_XYZW, true},
1077
1078 {"gl_ObjectPlaneS", NULL,
1079 {STATE_TEXGEN, 0, STATE_TEXGEN_OBJECT_S}, SWIZZLE_XYZW, true},
1080 {"gl_ObjectPlaneT", NULL,
1081 {STATE_TEXGEN, 0, STATE_TEXGEN_OBJECT_T}, SWIZZLE_XYZW, true},
1082 {"gl_ObjectPlaneR", NULL,
1083 {STATE_TEXGEN, 0, STATE_TEXGEN_OBJECT_R}, SWIZZLE_XYZW, true},
1084 {"gl_ObjectPlaneQ", NULL,
1085 {STATE_TEXGEN, 0, STATE_TEXGEN_OBJECT_Q}, SWIZZLE_XYZW, true},
1086
1087 {"gl_Fog", "color",
1088 {STATE_FOG_COLOR}, SWIZZLE_XYZW},
1089 {"gl_Fog", "density",
1090 {STATE_FOG_PARAMS}, SWIZZLE_XXXX},
1091 {"gl_Fog", "start",
1092 {STATE_FOG_PARAMS}, SWIZZLE_YYYY},
1093 {"gl_Fog", "end",
1094 {STATE_FOG_PARAMS}, SWIZZLE_ZZZZ},
1095 {"gl_Fog", "scale",
1096 {STATE_FOG_PARAMS}, SWIZZLE_WWWW},
1097 };
1098
1099 static ir_to_mesa_src_reg
1100 get_builtin_uniform_reg(struct gl_program *prog,
1101 const char *name, int array_index, const char *field)
1102 {
1103 unsigned int i;
1104 ir_to_mesa_src_reg src_reg;
1105 int tokens[STATE_LENGTH];
1106
1107 for (i = 0; i < Elements(statevars); i++) {
1108 if (strcmp(statevars[i].name, name) != 0)
1109 continue;
1110 if (!field && statevars[i].field) {
1111 assert(!"FINISHME: whole-structure state var dereference");
1112 }
1113 if (field && strcmp(statevars[i].field, field) != 0)
1114 continue;
1115 break;
1116 }
1117
1118 if (i == Elements(statevars)) {
1119 printf("builtin uniform %s%s%s not found\n",
1120 name,
1121 field ? "." : "",
1122 field ? field : "");
1123 abort();
1124 }
1125
1126 memcpy(&tokens, statevars[i].tokens, sizeof(tokens));
1127 if (statevars[i].array_indexed)
1128 tokens[1] = array_index;
1129
1130 src_reg.file = PROGRAM_STATE_VAR;
1131 src_reg.index = _mesa_add_state_reference(prog->Parameters,
1132 (gl_state_index *)tokens);
1133 src_reg.swizzle = statevars[i].swizzle;
1134 src_reg.negate = 0;
1135 src_reg.reladdr = false;
1136
1137 return src_reg;
1138 }
1139
1140 static int
1141 add_matrix_ref(struct gl_program *prog, int *tokens)
1142 {
1143 int base_pos = -1;
1144 int i;
1145
1146 /* Add a ref for each column. It looks like the reason we do
1147 * it this way is that _mesa_add_state_reference doesn't work
1148 * for things that aren't vec4s, so the tokens[2]/tokens[3]
1149 * range has to be equal.
1150 */
1151 for (i = 0; i < 4; i++) {
1152 tokens[2] = i;
1153 tokens[3] = i;
1154 int pos = _mesa_add_state_reference(prog->Parameters,
1155 (gl_state_index *)tokens);
1156 if (base_pos == -1)
1157 base_pos = pos;
1158 else
1159 assert(base_pos + i == pos);
1160 }
1161
1162 return base_pos;
1163 }
1164
1165 static variable_storage *
1166 get_builtin_matrix_ref(void *mem_ctx, struct gl_program *prog, ir_variable *var,
1167 ir_rvalue *array_index)
1168 {
1169 /*
1170 * NOTE: The ARB_vertex_program extension specified that matrices get
1171 * loaded in registers in row-major order. With GLSL, we want column-
1172 * major order. So, we need to transpose all matrices here...
1173 */
1174 static const struct {
1175 const char *name;
1176 int matrix;
1177 int modifier;
1178 } matrices[] = {
1179 { "gl_ModelViewMatrix", STATE_MODELVIEW_MATRIX, STATE_MATRIX_TRANSPOSE },
1180 { "gl_ModelViewMatrixInverse", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVTRANS },
1181 { "gl_ModelViewMatrixTranspose", STATE_MODELVIEW_MATRIX, 0 },
1182 { "gl_ModelViewMatrixInverseTranspose", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVERSE },
1183
1184 { "gl_ProjectionMatrix", STATE_PROJECTION_MATRIX, STATE_MATRIX_TRANSPOSE },
1185 { "gl_ProjectionMatrixInverse", STATE_PROJECTION_MATRIX, STATE_MATRIX_INVTRANS },
1186 { "gl_ProjectionMatrixTranspose", STATE_PROJECTION_MATRIX, 0 },
1187 { "gl_ProjectionMatrixInverseTranspose", STATE_PROJECTION_MATRIX, STATE_MATRIX_INVERSE },
1188
1189 { "gl_ModelViewProjectionMatrix", STATE_MVP_MATRIX, STATE_MATRIX_TRANSPOSE },
1190 { "gl_ModelViewProjectionMatrixInverse", STATE_MVP_MATRIX, STATE_MATRIX_INVTRANS },
1191 { "gl_ModelViewProjectionMatrixTranspose", STATE_MVP_MATRIX, 0 },
1192 { "gl_ModelViewProjectionMatrixInverseTranspose", STATE_MVP_MATRIX, STATE_MATRIX_INVERSE },
1193
1194 { "gl_TextureMatrix", STATE_TEXTURE_MATRIX, STATE_MATRIX_TRANSPOSE },
1195 { "gl_TextureMatrixInverse", STATE_TEXTURE_MATRIX, STATE_MATRIX_INVTRANS },
1196 { "gl_TextureMatrixTranspose", STATE_TEXTURE_MATRIX, 0 },
1197 { "gl_TextureMatrixInverseTranspose", STATE_TEXTURE_MATRIX, STATE_MATRIX_INVERSE },
1198
1199 { "gl_NormalMatrix", STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVERSE },
1200
1201 };
1202 unsigned int i;
1203 variable_storage *entry;
1204
1205 /* C++ gets angry when we try to use an int as a gl_state_index, so we use
1206 * ints for gl_state_index. Make sure they're compatible.
1207 */
1208 assert(sizeof(gl_state_index) == sizeof(int));
1209
1210 for (i = 0; i < Elements(matrices); i++) {
1211 if (strcmp(var->name, matrices[i].name) == 0) {
1212 int tokens[STATE_LENGTH];
1213 int base_pos = -1;
1214
1215 tokens[0] = matrices[i].matrix;
1216 tokens[4] = matrices[i].modifier;
1217 if (matrices[i].matrix == STATE_TEXTURE_MATRIX) {
1218 ir_constant *index = array_index->constant_expression_value();
1219 if (index) {
1220 tokens[1] = index->value.i[0];
1221 base_pos = add_matrix_ref(prog, tokens);
1222 } else {
1223 for (i = 0; i < var->type->length; i++) {
1224 tokens[1] = i;
1225 int pos = add_matrix_ref(prog, tokens);
1226 if (base_pos == -1)
1227 base_pos = pos;
1228 else
1229 assert(base_pos + (int)i * 4 == pos);
1230 }
1231 }
1232 } else {
1233 tokens[1] = 0; /* unused array index */
1234 base_pos = add_matrix_ref(prog, tokens);
1235 }
1236 tokens[4] = matrices[i].modifier;
1237
1238 entry = new(mem_ctx) variable_storage(var,
1239 PROGRAM_STATE_VAR,
1240 base_pos);
1241
1242 return entry;
1243 }
1244 }
1245
1246 return NULL;
1247 }
1248
1249 /* Recursively add all the members of the aggregate uniform as uniform names
1250 * to Mesa, moving those uniforms to our structured temporary.
1251 */
1252 void
1253 ir_to_mesa_visitor::add_aggregate_uniform(ir_instruction *ir,
1254 const char *name,
1255 const struct glsl_type *type,
1256 struct ir_to_mesa_dst_reg temp)
1257 {
1258 int loc;
1259
1260 if (type->is_record()) {
1261 void *mem_ctx = talloc_new(NULL);
1262
1263 for (unsigned int i = 0; i < type->length; i++) {
1264 const glsl_type *field_type = type->fields.structure[i].type;
1265 add_aggregate_uniform(ir,
1266 talloc_asprintf(mem_ctx, "%s.%s", name,
1267 type->fields.structure[i].name),
1268 field_type, temp);
1269 temp.index += type_size(field_type);
1270 }
1271
1272 talloc_free(mem_ctx);
1273
1274 return;
1275 }
1276
1277 assert(type->is_vector() || type->is_scalar() || !"FINISHME: other types");
1278
1279 int len;
1280
1281 if (type->is_vector() ||
1282 type->is_scalar()) {
1283 len = type->vector_elements;
1284 } else {
1285 len = type_size(type) * 4;
1286 }
1287
1288 loc = _mesa_add_uniform(this->prog->Parameters,
1289 name,
1290 len,
1291 type->gl_type,
1292 NULL);
1293
1294
1295 ir_to_mesa_src_reg uniform(PROGRAM_UNIFORM, loc, type);
1296
1297 for (int i = 0; i < type_size(type); i++) {
1298 ir_to_mesa_emit_op1(ir, OPCODE_MOV, temp, uniform);
1299 temp.index++;
1300 uniform.index++;
1301 }
1302 }
1303
1304
1305 void
1306 ir_to_mesa_visitor::visit(ir_dereference_variable *ir)
1307 {
1308 variable_storage *entry = find_variable_storage(ir->var);
1309 unsigned int loc;
1310 int len;
1311
1312 if (!entry) {
1313 switch (ir->var->mode) {
1314 case ir_var_uniform:
1315 entry = get_builtin_matrix_ref(this->mem_ctx, this->prog, ir->var,
1316 NULL);
1317 if (entry)
1318 break;
1319
1320 /* FINISHME: Fix up uniform name for arrays and things */
1321 if (ir->var->type->base_type == GLSL_TYPE_SAMPLER) {
1322 /* FINISHME: we whack the location of the var here, which
1323 * is probably not expected. But we need to communicate
1324 * mesa's sampler number to the tex instruction.
1325 */
1326 int sampler = _mesa_add_sampler(this->prog->Parameters,
1327 ir->var->name,
1328 ir->var->type->gl_type);
1329 map_sampler(ir->var->location, sampler);
1330
1331 entry = new(mem_ctx) variable_storage(ir->var, PROGRAM_SAMPLER,
1332 sampler);
1333 this->variables.push_tail(entry);
1334 break;
1335 }
1336
1337 assert(ir->var->type->gl_type != 0 &&
1338 ir->var->type->gl_type != GL_INVALID_ENUM);
1339
1340 /* Oh, the joy of aggregate types in Mesa. Like constants,
1341 * we can only really do vec4s. So, make a temp, chop the
1342 * aggregate up into vec4s, and move those vec4s to the temp.
1343 */
1344 if (ir->var->type->is_record()) {
1345 ir_to_mesa_src_reg temp = get_temp(ir->var->type);
1346
1347 entry = new(mem_ctx) variable_storage(ir->var,
1348 temp.file,
1349 temp.index);
1350 this->variables.push_tail(entry);
1351
1352 add_aggregate_uniform(ir->var, ir->var->name, ir->var->type,
1353 ir_to_mesa_dst_reg_from_src(temp));
1354 break;
1355 }
1356
1357 if (ir->var->type->is_vector() ||
1358 ir->var->type->is_scalar()) {
1359 len = ir->var->type->vector_elements;
1360 } else {
1361 len = type_size(ir->var->type) * 4;
1362 }
1363
1364 loc = _mesa_add_uniform(this->prog->Parameters,
1365 ir->var->name,
1366 len,
1367 ir->var->type->gl_type,
1368 NULL);
1369
1370 /* Always mark the uniform used at this point. If it isn't
1371 * used, dead code elimination should have nuked the decl already.
1372 */
1373 this->prog->Parameters->Parameters[loc].Used = GL_TRUE;
1374
1375 entry = new(mem_ctx) variable_storage(ir->var, PROGRAM_UNIFORM, loc);
1376 this->variables.push_tail(entry);
1377 break;
1378 case ir_var_in:
1379 case ir_var_out:
1380 case ir_var_inout:
1381 /* The linker assigns locations for varyings and attributes,
1382 * including deprecated builtins (like gl_Color), user-assign
1383 * generic attributes (glBindVertexLocation), and
1384 * user-defined varyings.
1385 *
1386 * FINISHME: We would hit this path for function arguments. Fix!
1387 */
1388 assert(ir->var->location != -1);
1389 if (ir->var->mode == ir_var_in ||
1390 ir->var->mode == ir_var_inout) {
1391 entry = new(mem_ctx) variable_storage(ir->var,
1392 PROGRAM_INPUT,
1393 ir->var->location);
1394
1395 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
1396 ir->var->location >= VERT_ATTRIB_GENERIC0) {
1397 _mesa_add_attribute(prog->Attributes,
1398 ir->var->name,
1399 type_size(ir->var->type) * 4,
1400 ir->var->type->gl_type,
1401 ir->var->location - VERT_ATTRIB_GENERIC0);
1402 }
1403 } else {
1404 entry = new(mem_ctx) variable_storage(ir->var,
1405 PROGRAM_OUTPUT,
1406 ir->var->location);
1407 }
1408
1409 break;
1410 case ir_var_auto:
1411 case ir_var_temporary:
1412 entry = new(mem_ctx) variable_storage(ir->var, PROGRAM_TEMPORARY,
1413 this->next_temp);
1414 this->variables.push_tail(entry);
1415
1416 next_temp += type_size(ir->var->type);
1417 break;
1418 }
1419
1420 if (!entry) {
1421 printf("Failed to make storage for %s\n", ir->var->name);
1422 exit(1);
1423 }
1424 }
1425
1426 this->result = ir_to_mesa_src_reg(entry->file, entry->index, ir->var->type);
1427 }
1428
1429 void
1430 ir_to_mesa_visitor::visit(ir_dereference_array *ir)
1431 {
1432 ir_variable *var = ir->variable_referenced();
1433 ir_constant *index;
1434 ir_to_mesa_src_reg src_reg;
1435 ir_dereference_variable *deref_var = ir->array->as_dereference_variable();
1436 int element_size = type_size(ir->type);
1437
1438 index = ir->array_index->constant_expression_value();
1439
1440 if (deref_var && strncmp(deref_var->var->name,
1441 "gl_TextureMatrix",
1442 strlen("gl_TextureMatrix")) == 0) {
1443 struct variable_storage *entry;
1444
1445 entry = get_builtin_matrix_ref(this->mem_ctx, this->prog, deref_var->var,
1446 ir->array_index);
1447 assert(entry);
1448
1449 ir_to_mesa_src_reg src_reg(entry->file, entry->index, ir->type);
1450
1451 if (index) {
1452 src_reg.reladdr = NULL;
1453 } else {
1454 ir_to_mesa_src_reg index_reg = get_temp(glsl_type::float_type);
1455
1456 ir->array_index->accept(this);
1457 ir_to_mesa_emit_op2(ir, OPCODE_MUL,
1458 ir_to_mesa_dst_reg_from_src(index_reg),
1459 this->result, src_reg_for_float(element_size));
1460
1461 src_reg.reladdr = talloc(mem_ctx, ir_to_mesa_src_reg);
1462 memcpy(src_reg.reladdr, &index_reg, sizeof(index_reg));
1463 }
1464
1465 this->result = src_reg;
1466 return;
1467 }
1468
1469 if (strncmp(var->name, "gl_", 3) == 0 && var->mode == ir_var_uniform &&
1470 !var->type->is_matrix()) {
1471 ir_dereference_record *record = NULL;
1472 if (ir->array->ir_type == ir_type_dereference_record)
1473 record = (ir_dereference_record *)ir->array;
1474
1475 assert(index || !"FINISHME: variable-indexed builtin uniform access");
1476
1477 this->result = get_builtin_uniform_reg(prog,
1478 var->name,
1479 index->value.i[0],
1480 record ? record->field : NULL);
1481 }
1482
1483 ir->array->accept(this);
1484 src_reg = this->result;
1485
1486 if (index) {
1487 src_reg.index += index->value.i[0] * element_size;
1488 } else {
1489 ir_to_mesa_src_reg array_base = this->result;
1490 /* Variable index array dereference. It eats the "vec4" of the
1491 * base of the array and an index that offsets the Mesa register
1492 * index.
1493 */
1494 ir->array_index->accept(this);
1495
1496 ir_to_mesa_src_reg index_reg;
1497
1498 if (element_size == 1) {
1499 index_reg = this->result;
1500 } else {
1501 index_reg = get_temp(glsl_type::float_type);
1502
1503 ir_to_mesa_emit_op2(ir, OPCODE_MUL,
1504 ir_to_mesa_dst_reg_from_src(index_reg),
1505 this->result, src_reg_for_float(element_size));
1506 }
1507
1508 src_reg.reladdr = talloc(mem_ctx, ir_to_mesa_src_reg);
1509 memcpy(src_reg.reladdr, &index_reg, sizeof(index_reg));
1510 }
1511
1512 /* If the type is smaller than a vec4, replicate the last channel out. */
1513 if (ir->type->is_scalar() || ir->type->is_vector())
1514 src_reg.swizzle = swizzle_for_size(ir->type->vector_elements);
1515 else
1516 src_reg.swizzle = SWIZZLE_NOOP;
1517
1518 this->result = src_reg;
1519 }
1520
1521 void
1522 ir_to_mesa_visitor::visit(ir_dereference_record *ir)
1523 {
1524 unsigned int i;
1525 const glsl_type *struct_type = ir->record->type;
1526 int offset = 0;
1527 ir_variable *var = ir->record->variable_referenced();
1528
1529 if (strncmp(var->name, "gl_", 3) == 0 && var->mode == ir_var_uniform) {
1530 assert(var);
1531
1532 this->result = get_builtin_uniform_reg(prog,
1533 var->name,
1534 0,
1535 ir->field);
1536 return;
1537 }
1538
1539 ir->record->accept(this);
1540
1541 for (i = 0; i < struct_type->length; i++) {
1542 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1543 break;
1544 offset += type_size(struct_type->fields.structure[i].type);
1545 }
1546 this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1547 this->result.index += offset;
1548 }
1549
1550 /**
1551 * We want to be careful in assignment setup to hit the actual storage
1552 * instead of potentially using a temporary like we might with the
1553 * ir_dereference handler.
1554 *
1555 * Thanks to ir_swizzle_swizzle, and ir_vec_index_to_swizzle, we
1556 * should only see potentially one variable array index of a vector,
1557 * and one swizzle, before getting to actual vec4 storage. So handle
1558 * those, then go use ir_dereference to handle the rest.
1559 */
1560 static struct ir_to_mesa_dst_reg
1561 get_assignment_lhs(ir_instruction *ir, ir_to_mesa_visitor *v,
1562 ir_to_mesa_src_reg *r)
1563 {
1564 struct ir_to_mesa_dst_reg dst_reg;
1565 ir_swizzle *swiz;
1566
1567 ir_dereference_array *deref_array = ir->as_dereference_array();
1568 /* This should have been handled by ir_vec_index_to_cond_assign */
1569 if (deref_array) {
1570 assert(!deref_array->array->type->is_vector());
1571 }
1572
1573 /* Use the rvalue deref handler for the most part. We'll ignore
1574 * swizzles in it and write swizzles using writemask, though.
1575 */
1576 ir->accept(v);
1577 dst_reg = ir_to_mesa_dst_reg_from_src(v->result);
1578
1579 if ((swiz = ir->as_swizzle())) {
1580 int swizzles[4] = {
1581 swiz->mask.x,
1582 swiz->mask.y,
1583 swiz->mask.z,
1584 swiz->mask.w
1585 };
1586 int new_r_swizzle[4];
1587 int orig_r_swizzle = r->swizzle;
1588 int i;
1589
1590 for (i = 0; i < 4; i++) {
1591 new_r_swizzle[i] = GET_SWZ(orig_r_swizzle, 0);
1592 }
1593
1594 dst_reg.writemask = 0;
1595 for (i = 0; i < 4; i++) {
1596 if (i < swiz->mask.num_components) {
1597 dst_reg.writemask |= 1 << swizzles[i];
1598 new_r_swizzle[swizzles[i]] = GET_SWZ(orig_r_swizzle, i);
1599 }
1600 }
1601
1602 r->swizzle = MAKE_SWIZZLE4(new_r_swizzle[0],
1603 new_r_swizzle[1],
1604 new_r_swizzle[2],
1605 new_r_swizzle[3]);
1606 }
1607
1608 return dst_reg;
1609 }
1610
1611 void
1612 ir_to_mesa_visitor::visit(ir_assignment *ir)
1613 {
1614 struct ir_to_mesa_dst_reg l;
1615 struct ir_to_mesa_src_reg r;
1616 int i;
1617
1618 ir->rhs->accept(this);
1619 r = this->result;
1620
1621 l = get_assignment_lhs(ir->lhs, this, &r);
1622
1623 assert(l.file != PROGRAM_UNDEFINED);
1624 assert(r.file != PROGRAM_UNDEFINED);
1625
1626 if (ir->condition) {
1627 ir_to_mesa_src_reg condition;
1628
1629 ir->condition->accept(this);
1630 condition = this->result;
1631
1632 /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves,
1633 * and the condition we produced is 0.0 or 1.0. By flipping the
1634 * sign, we can choose which value OPCODE_CMP produces without
1635 * an extra computing the condition.
1636 */
1637 condition.negate = ~condition.negate;
1638 for (i = 0; i < type_size(ir->lhs->type); i++) {
1639 ir_to_mesa_emit_op3(ir, OPCODE_CMP, l,
1640 condition, r, ir_to_mesa_src_reg_from_dst(l));
1641 l.index++;
1642 r.index++;
1643 }
1644 } else {
1645 for (i = 0; i < type_size(ir->lhs->type); i++) {
1646 ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r);
1647 l.index++;
1648 r.index++;
1649 }
1650 }
1651 }
1652
1653
1654 void
1655 ir_to_mesa_visitor::visit(ir_constant *ir)
1656 {
1657 ir_to_mesa_src_reg src_reg;
1658 GLfloat stack_vals[4];
1659 GLfloat *values = stack_vals;
1660 unsigned int i;
1661
1662 /* Unfortunately, 4 floats is all we can get into
1663 * _mesa_add_unnamed_constant. So, make a temp to store an
1664 * aggregate constant and move each constant value into it. If we
1665 * get lucky, copy propagation will eliminate the extra moves.
1666 */
1667
1668 if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1669 ir_to_mesa_src_reg temp_base = get_temp(ir->type);
1670 ir_to_mesa_dst_reg temp = ir_to_mesa_dst_reg_from_src(temp_base);
1671
1672 foreach_iter(exec_list_iterator, iter, ir->components) {
1673 ir_constant *field_value = (ir_constant *)iter.get();
1674 int size = type_size(field_value->type);
1675
1676 assert(size > 0);
1677
1678 field_value->accept(this);
1679 src_reg = this->result;
1680
1681 for (i = 0; i < (unsigned int)size; i++) {
1682 ir_to_mesa_emit_op1(ir, OPCODE_MOV, temp, src_reg);
1683
1684 src_reg.index++;
1685 temp.index++;
1686 }
1687 }
1688 this->result = temp_base;
1689 return;
1690 }
1691
1692 if (ir->type->is_array()) {
1693 ir_to_mesa_src_reg temp_base = get_temp(ir->type);
1694 ir_to_mesa_dst_reg temp = ir_to_mesa_dst_reg_from_src(temp_base);
1695 int size = type_size(ir->type->fields.array);
1696
1697 assert(size > 0);
1698
1699 for (i = 0; i < ir->type->length; i++) {
1700 ir->array_elements[i]->accept(this);
1701 src_reg = this->result;
1702 for (int j = 0; j < size; j++) {
1703 ir_to_mesa_emit_op1(ir, OPCODE_MOV, temp, src_reg);
1704
1705 src_reg.index++;
1706 temp.index++;
1707 }
1708 }
1709 this->result = temp_base;
1710 return;
1711 }
1712
1713 if (ir->type->is_matrix()) {
1714 ir_to_mesa_src_reg mat = get_temp(ir->type);
1715 ir_to_mesa_dst_reg mat_column = ir_to_mesa_dst_reg_from_src(mat);
1716
1717 for (i = 0; i < ir->type->matrix_columns; i++) {
1718 assert(ir->type->base_type == GLSL_TYPE_FLOAT);
1719 values = &ir->value.f[i * ir->type->vector_elements];
1720
1721 src_reg = ir_to_mesa_src_reg(PROGRAM_CONSTANT, -1, NULL);
1722 src_reg.index = _mesa_add_unnamed_constant(this->prog->Parameters,
1723 values,
1724 ir->type->vector_elements,
1725 &src_reg.swizzle);
1726 ir_to_mesa_emit_op1(ir, OPCODE_MOV, mat_column, src_reg);
1727
1728 mat_column.index++;
1729 }
1730
1731 this->result = mat;
1732 }
1733
1734 src_reg.file = PROGRAM_CONSTANT;
1735 switch (ir->type->base_type) {
1736 case GLSL_TYPE_FLOAT:
1737 values = &ir->value.f[0];
1738 break;
1739 case GLSL_TYPE_UINT:
1740 for (i = 0; i < ir->type->vector_elements; i++) {
1741 values[i] = ir->value.u[i];
1742 }
1743 break;
1744 case GLSL_TYPE_INT:
1745 for (i = 0; i < ir->type->vector_elements; i++) {
1746 values[i] = ir->value.i[i];
1747 }
1748 break;
1749 case GLSL_TYPE_BOOL:
1750 for (i = 0; i < ir->type->vector_elements; i++) {
1751 values[i] = ir->value.b[i];
1752 }
1753 break;
1754 default:
1755 assert(!"Non-float/uint/int/bool constant");
1756 }
1757
1758 this->result = ir_to_mesa_src_reg(PROGRAM_CONSTANT, -1, ir->type);
1759 this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters,
1760 values,
1761 ir->type->vector_elements,
1762 &this->result.swizzle);
1763 }
1764
1765 function_entry *
1766 ir_to_mesa_visitor::get_function_signature(ir_function_signature *sig)
1767 {
1768 function_entry *entry;
1769
1770 foreach_iter(exec_list_iterator, iter, this->function_signatures) {
1771 entry = (function_entry *)iter.get();
1772
1773 if (entry->sig == sig)
1774 return entry;
1775 }
1776
1777 entry = talloc(mem_ctx, function_entry);
1778 entry->sig = sig;
1779 entry->sig_id = this->next_signature_id++;
1780 entry->bgn_inst = NULL;
1781
1782 /* Allocate storage for all the parameters. */
1783 foreach_iter(exec_list_iterator, iter, sig->parameters) {
1784 ir_variable *param = (ir_variable *)iter.get();
1785 variable_storage *storage;
1786
1787 storage = find_variable_storage(param);
1788 assert(!storage);
1789
1790 storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY,
1791 this->next_temp);
1792 this->variables.push_tail(storage);
1793
1794 this->next_temp += type_size(param->type);
1795 }
1796
1797 if (!sig->return_type->is_void()) {
1798 entry->return_reg = get_temp(sig->return_type);
1799 } else {
1800 entry->return_reg = ir_to_mesa_undef;
1801 }
1802
1803 this->function_signatures.push_tail(entry);
1804 return entry;
1805 }
1806
1807 void
1808 ir_to_mesa_visitor::visit(ir_call *ir)
1809 {
1810 ir_to_mesa_instruction *call_inst;
1811 ir_function_signature *sig = ir->get_callee();
1812 function_entry *entry = get_function_signature(sig);
1813 int i;
1814
1815 /* Process in parameters. */
1816 exec_list_iterator sig_iter = sig->parameters.iterator();
1817 foreach_iter(exec_list_iterator, iter, *ir) {
1818 ir_rvalue *param_rval = (ir_rvalue *)iter.get();
1819 ir_variable *param = (ir_variable *)sig_iter.get();
1820
1821 if (param->mode == ir_var_in ||
1822 param->mode == ir_var_inout) {
1823 variable_storage *storage = find_variable_storage(param);
1824 assert(storage);
1825
1826 param_rval->accept(this);
1827 ir_to_mesa_src_reg r = this->result;
1828
1829 ir_to_mesa_dst_reg l;
1830 l.file = storage->file;
1831 l.index = storage->index;
1832 l.reladdr = NULL;
1833 l.writemask = WRITEMASK_XYZW;
1834 l.cond_mask = COND_TR;
1835
1836 for (i = 0; i < type_size(param->type); i++) {
1837 ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r);
1838 l.index++;
1839 r.index++;
1840 }
1841 }
1842
1843 sig_iter.next();
1844 }
1845 assert(!sig_iter.has_next());
1846
1847 /* Emit call instruction */
1848 call_inst = ir_to_mesa_emit_op1(ir, OPCODE_CAL,
1849 ir_to_mesa_undef_dst, ir_to_mesa_undef);
1850 call_inst->function = entry;
1851
1852 /* Process out parameters. */
1853 sig_iter = sig->parameters.iterator();
1854 foreach_iter(exec_list_iterator, iter, *ir) {
1855 ir_rvalue *param_rval = (ir_rvalue *)iter.get();
1856 ir_variable *param = (ir_variable *)sig_iter.get();
1857
1858 if (param->mode == ir_var_out ||
1859 param->mode == ir_var_inout) {
1860 variable_storage *storage = find_variable_storage(param);
1861 assert(storage);
1862
1863 ir_to_mesa_src_reg r;
1864 r.file = storage->file;
1865 r.index = storage->index;
1866 r.reladdr = NULL;
1867 r.swizzle = SWIZZLE_NOOP;
1868 r.negate = 0;
1869
1870 param_rval->accept(this);
1871 ir_to_mesa_dst_reg l = ir_to_mesa_dst_reg_from_src(this->result);
1872
1873 for (i = 0; i < type_size(param->type); i++) {
1874 ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r);
1875 l.index++;
1876 r.index++;
1877 }
1878 }
1879
1880 sig_iter.next();
1881 }
1882 assert(!sig_iter.has_next());
1883
1884 /* Process return value. */
1885 this->result = entry->return_reg;
1886 }
1887
1888
1889 void
1890 ir_to_mesa_visitor::visit(ir_texture *ir)
1891 {
1892 ir_to_mesa_src_reg result_src, coord, lod_info, projector;
1893 ir_to_mesa_dst_reg result_dst, coord_dst;
1894 ir_to_mesa_instruction *inst = NULL;
1895 prog_opcode opcode = OPCODE_NOP;
1896
1897 ir->coordinate->accept(this);
1898
1899 /* Put our coords in a temp. We'll need to modify them for shadow,
1900 * projection, or LOD, so the only case we'd use it as is is if
1901 * we're doing plain old texturing. Mesa IR optimization should
1902 * handle cleaning up our mess in that case.
1903 */
1904 coord = get_temp(glsl_type::vec4_type);
1905 coord_dst = ir_to_mesa_dst_reg_from_src(coord);
1906 ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst,
1907 this->result);
1908
1909 if (ir->projector) {
1910 ir->projector->accept(this);
1911 projector = this->result;
1912 }
1913
1914 /* Storage for our result. Ideally for an assignment we'd be using
1915 * the actual storage for the result here, instead.
1916 */
1917 result_src = get_temp(glsl_type::vec4_type);
1918 result_dst = ir_to_mesa_dst_reg_from_src(result_src);
1919
1920 switch (ir->op) {
1921 case ir_tex:
1922 opcode = OPCODE_TEX;
1923 break;
1924 case ir_txb:
1925 opcode = OPCODE_TXB;
1926 ir->lod_info.bias->accept(this);
1927 lod_info = this->result;
1928 break;
1929 case ir_txl:
1930 opcode = OPCODE_TXL;
1931 ir->lod_info.lod->accept(this);
1932 lod_info = this->result;
1933 break;
1934 case ir_txd:
1935 case ir_txf:
1936 assert(!"GLSL 1.30 features unsupported");
1937 break;
1938 }
1939
1940 if (ir->projector) {
1941 if (opcode == OPCODE_TEX) {
1942 /* Slot the projector in as the last component of the coord. */
1943 coord_dst.writemask = WRITEMASK_W;
1944 ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst, projector);
1945 coord_dst.writemask = WRITEMASK_XYZW;
1946 opcode = OPCODE_TXP;
1947 } else {
1948 ir_to_mesa_src_reg coord_w = coord;
1949 coord_w.swizzle = SWIZZLE_WWWW;
1950
1951 /* For the other TEX opcodes there's no projective version
1952 * since the last slot is taken up by lod info. Do the
1953 * projective divide now.
1954 */
1955 coord_dst.writemask = WRITEMASK_W;
1956 ir_to_mesa_emit_op1(ir, OPCODE_RCP, coord_dst, projector);
1957
1958 coord_dst.writemask = WRITEMASK_XYZ;
1959 ir_to_mesa_emit_op2(ir, OPCODE_MUL, coord_dst, coord, coord_w);
1960
1961 coord_dst.writemask = WRITEMASK_XYZW;
1962 coord.swizzle = SWIZZLE_XYZW;
1963 }
1964 }
1965
1966 if (ir->shadow_comparitor) {
1967 /* Slot the shadow value in as the second to last component of the
1968 * coord.
1969 */
1970 ir->shadow_comparitor->accept(this);
1971 coord_dst.writemask = WRITEMASK_Z;
1972 ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst, this->result);
1973 coord_dst.writemask = WRITEMASK_XYZW;
1974 }
1975
1976 if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) {
1977 /* Mesa IR stores lod or lod bias in the last channel of the coords. */
1978 coord_dst.writemask = WRITEMASK_W;
1979 ir_to_mesa_emit_op1(ir, OPCODE_MOV, coord_dst, lod_info);
1980 coord_dst.writemask = WRITEMASK_XYZW;
1981 }
1982
1983 inst = ir_to_mesa_emit_op1(ir, opcode, result_dst, coord);
1984
1985 if (ir->shadow_comparitor)
1986 inst->tex_shadow = GL_TRUE;
1987
1988 ir_dereference_variable *sampler = ir->sampler->as_dereference_variable();
1989 assert(sampler); /* FINISHME: sampler arrays */
1990 /* generate the mapping, remove when we generate storage at
1991 * declaration time
1992 */
1993 sampler->accept(this);
1994
1995 inst->sampler = get_sampler_number(sampler->var->location);
1996
1997 switch (sampler->type->sampler_dimensionality) {
1998 case GLSL_SAMPLER_DIM_1D:
1999 inst->tex_target = TEXTURE_1D_INDEX;
2000 break;
2001 case GLSL_SAMPLER_DIM_2D:
2002 inst->tex_target = TEXTURE_2D_INDEX;
2003 break;
2004 case GLSL_SAMPLER_DIM_3D:
2005 inst->tex_target = TEXTURE_3D_INDEX;
2006 break;
2007 case GLSL_SAMPLER_DIM_CUBE:
2008 inst->tex_target = TEXTURE_CUBE_INDEX;
2009 break;
2010 default:
2011 assert(!"FINISHME: other texture targets");
2012 }
2013
2014 this->result = result_src;
2015 }
2016
2017 void
2018 ir_to_mesa_visitor::visit(ir_return *ir)
2019 {
2020 assert(current_function);
2021
2022 if (ir->get_value()) {
2023 ir_to_mesa_dst_reg l;
2024 int i;
2025
2026 ir->get_value()->accept(this);
2027 ir_to_mesa_src_reg r = this->result;
2028
2029 l = ir_to_mesa_dst_reg_from_src(current_function->return_reg);
2030
2031 for (i = 0; i < type_size(current_function->sig->return_type); i++) {
2032 ir_to_mesa_emit_op1(ir, OPCODE_MOV, l, r);
2033 l.index++;
2034 r.index++;
2035 }
2036 }
2037
2038 ir_to_mesa_emit_op0(ir, OPCODE_RET);
2039 }
2040
2041 void
2042 ir_to_mesa_visitor::visit(ir_discard *ir)
2043 {
2044 assert(ir->condition == NULL); /* FINISHME */
2045
2046 ir_to_mesa_emit_op0(ir, OPCODE_KIL_NV);
2047 }
2048
2049 void
2050 ir_to_mesa_visitor::visit(ir_if *ir)
2051 {
2052 ir_to_mesa_instruction *cond_inst, *if_inst, *else_inst = NULL;
2053 ir_to_mesa_instruction *prev_inst;
2054
2055 prev_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
2056
2057 ir->condition->accept(this);
2058 assert(this->result.file != PROGRAM_UNDEFINED);
2059
2060 if (ctx->Shader.EmitCondCodes) {
2061 cond_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
2062
2063 /* See if we actually generated any instruction for generating
2064 * the condition. If not, then cook up a move to a temp so we
2065 * have something to set cond_update on.
2066 */
2067 if (cond_inst == prev_inst) {
2068 ir_to_mesa_src_reg temp = get_temp(glsl_type::bool_type);
2069 cond_inst = ir_to_mesa_emit_op1(ir->condition, OPCODE_MOV,
2070 ir_to_mesa_dst_reg_from_src(temp),
2071 result);
2072 }
2073 cond_inst->cond_update = GL_TRUE;
2074
2075 if_inst = ir_to_mesa_emit_op0(ir->condition, OPCODE_IF);
2076 if_inst->dst_reg.cond_mask = COND_NE;
2077 } else {
2078 if_inst = ir_to_mesa_emit_op1(ir->condition,
2079 OPCODE_IF, ir_to_mesa_undef_dst,
2080 this->result);
2081 }
2082
2083 this->instructions.push_tail(if_inst);
2084
2085 visit_exec_list(&ir->then_instructions, this);
2086
2087 if (!ir->else_instructions.is_empty()) {
2088 else_inst = ir_to_mesa_emit_op0(ir->condition, OPCODE_ELSE);
2089 visit_exec_list(&ir->else_instructions, this);
2090 }
2091
2092 if_inst = ir_to_mesa_emit_op1(ir->condition, OPCODE_ENDIF,
2093 ir_to_mesa_undef_dst, ir_to_mesa_undef);
2094 }
2095
2096 ir_to_mesa_visitor::ir_to_mesa_visitor()
2097 {
2098 result.file = PROGRAM_UNDEFINED;
2099 next_temp = 1;
2100 next_signature_id = 1;
2101 sampler_map = NULL;
2102 sampler_map_size = 0;
2103 current_function = NULL;
2104 }
2105
2106 static struct prog_src_register
2107 mesa_src_reg_from_ir_src_reg(ir_to_mesa_src_reg reg)
2108 {
2109 struct prog_src_register mesa_reg;
2110
2111 mesa_reg.File = reg.file;
2112 assert(reg.index < (1 << INST_INDEX_BITS) - 1);
2113 mesa_reg.Index = reg.index;
2114 mesa_reg.Swizzle = reg.swizzle;
2115 mesa_reg.RelAddr = reg.reladdr != NULL;
2116 mesa_reg.Negate = reg.negate;
2117 mesa_reg.Abs = 0;
2118
2119 return mesa_reg;
2120 }
2121
2122 static void
2123 set_branchtargets(ir_to_mesa_visitor *v,
2124 struct prog_instruction *mesa_instructions,
2125 int num_instructions)
2126 {
2127 int if_count = 0, loop_count = 0;
2128 int *if_stack, *loop_stack;
2129 int if_stack_pos = 0, loop_stack_pos = 0;
2130 int i, j;
2131
2132 for (i = 0; i < num_instructions; i++) {
2133 switch (mesa_instructions[i].Opcode) {
2134 case OPCODE_IF:
2135 if_count++;
2136 break;
2137 case OPCODE_BGNLOOP:
2138 loop_count++;
2139 break;
2140 case OPCODE_BRK:
2141 case OPCODE_CONT:
2142 mesa_instructions[i].BranchTarget = -1;
2143 break;
2144 default:
2145 break;
2146 }
2147 }
2148
2149 if_stack = (int *)calloc(if_count, sizeof(*if_stack));
2150 loop_stack = (int *)calloc(loop_count, sizeof(*loop_stack));
2151
2152 for (i = 0; i < num_instructions; i++) {
2153 switch (mesa_instructions[i].Opcode) {
2154 case OPCODE_IF:
2155 if_stack[if_stack_pos] = i;
2156 if_stack_pos++;
2157 break;
2158 case OPCODE_ELSE:
2159 mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
2160 if_stack[if_stack_pos - 1] = i;
2161 break;
2162 case OPCODE_ENDIF:
2163 mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
2164 if_stack_pos--;
2165 break;
2166 case OPCODE_BGNLOOP:
2167 loop_stack[loop_stack_pos] = i;
2168 loop_stack_pos++;
2169 break;
2170 case OPCODE_ENDLOOP:
2171 loop_stack_pos--;
2172 /* Rewrite any breaks/conts at this nesting level (haven't
2173 * already had a BranchTarget assigned) to point to the end
2174 * of the loop.
2175 */
2176 for (j = loop_stack[loop_stack_pos]; j < i; j++) {
2177 if (mesa_instructions[j].Opcode == OPCODE_BRK ||
2178 mesa_instructions[j].Opcode == OPCODE_CONT) {
2179 if (mesa_instructions[j].BranchTarget == -1) {
2180 mesa_instructions[j].BranchTarget = i;
2181 }
2182 }
2183 }
2184 /* The loop ends point at each other. */
2185 mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos];
2186 mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i;
2187 break;
2188 case OPCODE_CAL:
2189 foreach_iter(exec_list_iterator, iter, v->function_signatures) {
2190 function_entry *entry = (function_entry *)iter.get();
2191
2192 if (entry->sig_id == mesa_instructions[i].BranchTarget) {
2193 mesa_instructions[i].BranchTarget = entry->inst;
2194 break;
2195 }
2196 }
2197 break;
2198 default:
2199 break;
2200 }
2201 }
2202
2203 free(if_stack);
2204 }
2205
2206 static void
2207 print_program(struct prog_instruction *mesa_instructions,
2208 ir_instruction **mesa_instruction_annotation,
2209 int num_instructions)
2210 {
2211 ir_instruction *last_ir = NULL;
2212 int i;
2213 int indent = 0;
2214
2215 for (i = 0; i < num_instructions; i++) {
2216 struct prog_instruction *mesa_inst = mesa_instructions + i;
2217 ir_instruction *ir = mesa_instruction_annotation[i];
2218
2219 fprintf(stdout, "%3d: ", i);
2220
2221 if (last_ir != ir && ir) {
2222 int j;
2223
2224 for (j = 0; j < indent; j++) {
2225 fprintf(stdout, " ");
2226 }
2227 ir->print();
2228 printf("\n");
2229 last_ir = ir;
2230
2231 fprintf(stdout, " "); /* line number spacing. */
2232 }
2233
2234 indent = _mesa_fprint_instruction_opt(stdout, mesa_inst, indent,
2235 PROG_PRINT_DEBUG, NULL);
2236 }
2237 }
2238
2239 static void
2240 mark_input(struct gl_program *prog,
2241 int index,
2242 GLboolean reladdr)
2243 {
2244 prog->InputsRead |= BITFIELD64_BIT(index);
2245 int i;
2246
2247 if (reladdr) {
2248 if (index >= FRAG_ATTRIB_TEX0 && index <= FRAG_ATTRIB_TEX7) {
2249 for (i = 0; i < 8; i++) {
2250 prog->InputsRead |= BITFIELD64_BIT(FRAG_ATTRIB_TEX0 + i);
2251 }
2252 } else {
2253 assert(!"FINISHME: Mark InputsRead for varying arrays");
2254 }
2255 }
2256 }
2257
2258 static void
2259 mark_output(struct gl_program *prog,
2260 int index,
2261 GLboolean reladdr)
2262 {
2263 prog->OutputsWritten |= BITFIELD64_BIT(index);
2264 int i;
2265
2266 if (reladdr) {
2267 if (index >= VERT_RESULT_TEX0 && index <= VERT_RESULT_TEX7) {
2268 for (i = 0; i < 8; i++) {
2269 prog->OutputsWritten |= BITFIELD64_BIT(FRAG_ATTRIB_TEX0 + i);
2270 }
2271 } else {
2272 assert(!"FINISHME: Mark OutputsWritten for varying arrays");
2273 }
2274 }
2275 }
2276
2277 static void
2278 count_resources(struct gl_program *prog)
2279 {
2280 unsigned int i;
2281
2282 prog->InputsRead = 0;
2283 prog->OutputsWritten = 0;
2284 prog->SamplersUsed = 0;
2285
2286 for (i = 0; i < prog->NumInstructions; i++) {
2287 struct prog_instruction *inst = &prog->Instructions[i];
2288 unsigned int reg;
2289
2290 switch (inst->DstReg.File) {
2291 case PROGRAM_OUTPUT:
2292 mark_output(prog, inst->DstReg.Index, inst->DstReg.RelAddr);
2293 break;
2294 case PROGRAM_INPUT:
2295 mark_input(prog, inst->DstReg.Index, inst->DstReg.RelAddr);
2296 break;
2297 default:
2298 break;
2299 }
2300
2301 for (reg = 0; reg < _mesa_num_inst_src_regs(inst->Opcode); reg++) {
2302 switch (inst->SrcReg[reg].File) {
2303 case PROGRAM_OUTPUT:
2304 mark_output(prog, inst->SrcReg[reg].Index,
2305 inst->SrcReg[reg].RelAddr);
2306 break;
2307 case PROGRAM_INPUT:
2308 mark_input(prog, inst->SrcReg[reg].Index, inst->SrcReg[reg].RelAddr);
2309 break;
2310 default:
2311 break;
2312 }
2313 }
2314
2315 /* Instead of just using the uniform's value to map to a
2316 * sampler, Mesa first allocates a separate number for the
2317 * sampler (_mesa_add_sampler), then we reindex it down to a
2318 * small integer (sampler_map[], SamplersUsed), then that gets
2319 * mapped to the uniform's value, and we get an actual sampler.
2320 */
2321 if (_mesa_is_tex_instruction(inst->Opcode)) {
2322 prog->SamplerTargets[inst->TexSrcUnit] =
2323 (gl_texture_index)inst->TexSrcTarget;
2324 prog->SamplersUsed |= 1 << inst->TexSrcUnit;
2325 if (inst->TexShadow) {
2326 prog->ShadowSamplers |= 1 << inst->TexSrcUnit;
2327 }
2328 }
2329 }
2330
2331 _mesa_update_shader_textures_used(prog);
2332 }
2333
2334 /* Each stage has some uniforms in its Parameters list. The Uniforms
2335 * list for the linked shader program has a pointer to these uniforms
2336 * in each of the stage's Parameters list, so that their values can be
2337 * updated when a uniform is set.
2338 */
2339 static void
2340 link_uniforms_to_shared_uniform_list(struct gl_uniform_list *uniforms,
2341 struct gl_program *prog)
2342 {
2343 unsigned int i;
2344
2345 for (i = 0; i < prog->Parameters->NumParameters; i++) {
2346 const struct gl_program_parameter *p = prog->Parameters->Parameters + i;
2347
2348 if (p->Type == PROGRAM_UNIFORM || p->Type == PROGRAM_SAMPLER) {
2349 struct gl_uniform *uniform =
2350 _mesa_append_uniform(uniforms, p->Name, prog->Target, i);
2351 if (uniform)
2352 uniform->Initialized = p->Initialized;
2353 }
2354 }
2355 }
2356
2357 struct gl_program *
2358 get_mesa_program(GLcontext *ctx, struct gl_shader_program *shader_program,
2359 struct gl_shader *shader)
2360 {
2361 void *mem_ctx = shader_program;
2362 ir_to_mesa_visitor v;
2363 struct prog_instruction *mesa_instructions, *mesa_inst;
2364 ir_instruction **mesa_instruction_annotation;
2365 int i;
2366 struct gl_program *prog;
2367 GLenum target;
2368 const char *target_string;
2369 GLboolean progress;
2370
2371 switch (shader->Type) {
2372 case GL_VERTEX_SHADER:
2373 target = GL_VERTEX_PROGRAM_ARB;
2374 target_string = "vertex";
2375 break;
2376 case GL_FRAGMENT_SHADER:
2377 target = GL_FRAGMENT_PROGRAM_ARB;
2378 target_string = "fragment";
2379 break;
2380 default:
2381 assert(!"should not be reached");
2382 break;
2383 }
2384
2385 validate_ir_tree(shader->ir);
2386
2387 prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
2388 if (!prog)
2389 return NULL;
2390 prog->Parameters = _mesa_new_parameter_list();
2391 prog->Varying = _mesa_new_parameter_list();
2392 prog->Attributes = _mesa_new_parameter_list();
2393 v.ctx = ctx;
2394 v.prog = prog;
2395
2396 v.mem_ctx = talloc_new(NULL);
2397
2398 /* Emit Mesa IR for main(). */
2399 visit_exec_list(shader->ir, &v);
2400 v.ir_to_mesa_emit_op0(NULL, OPCODE_END);
2401
2402 /* Now emit bodies for any functions that were used. */
2403 do {
2404 progress = GL_FALSE;
2405
2406 foreach_iter(exec_list_iterator, iter, v.function_signatures) {
2407 function_entry *entry = (function_entry *)iter.get();
2408
2409 if (!entry->bgn_inst) {
2410 v.current_function = entry;
2411
2412 entry->bgn_inst = v.ir_to_mesa_emit_op0(NULL, OPCODE_BGNSUB);
2413 entry->bgn_inst->function = entry;
2414
2415 visit_exec_list(&entry->sig->body, &v);
2416
2417 ir_to_mesa_instruction *last;
2418 last = (ir_to_mesa_instruction *)v.instructions.get_tail();
2419 if (last->op != OPCODE_RET)
2420 v.ir_to_mesa_emit_op0(NULL, OPCODE_RET);
2421
2422 ir_to_mesa_instruction *end;
2423 end = v.ir_to_mesa_emit_op0(NULL, OPCODE_ENDSUB);
2424 end->function = entry;
2425
2426 progress = GL_TRUE;
2427 }
2428 }
2429 } while (progress);
2430
2431 prog->NumTemporaries = v.next_temp;
2432
2433 int num_instructions = 0;
2434 foreach_iter(exec_list_iterator, iter, v.instructions) {
2435 num_instructions++;
2436 }
2437
2438 mesa_instructions =
2439 (struct prog_instruction *)calloc(num_instructions,
2440 sizeof(*mesa_instructions));
2441 mesa_instruction_annotation = talloc_array(mem_ctx, ir_instruction *,
2442 num_instructions);
2443
2444 mesa_inst = mesa_instructions;
2445 i = 0;
2446 foreach_iter(exec_list_iterator, iter, v.instructions) {
2447 ir_to_mesa_instruction *inst = (ir_to_mesa_instruction *)iter.get();
2448
2449 mesa_inst->Opcode = inst->op;
2450 mesa_inst->CondUpdate = inst->cond_update;
2451 mesa_inst->DstReg.File = inst->dst_reg.file;
2452 mesa_inst->DstReg.Index = inst->dst_reg.index;
2453 mesa_inst->DstReg.CondMask = inst->dst_reg.cond_mask;
2454 mesa_inst->DstReg.WriteMask = inst->dst_reg.writemask;
2455 mesa_inst->DstReg.RelAddr = inst->dst_reg.reladdr != NULL;
2456 mesa_inst->SrcReg[0] = mesa_src_reg_from_ir_src_reg(inst->src_reg[0]);
2457 mesa_inst->SrcReg[1] = mesa_src_reg_from_ir_src_reg(inst->src_reg[1]);
2458 mesa_inst->SrcReg[2] = mesa_src_reg_from_ir_src_reg(inst->src_reg[2]);
2459 mesa_inst->TexSrcUnit = inst->sampler;
2460 mesa_inst->TexSrcTarget = inst->tex_target;
2461 mesa_inst->TexShadow = inst->tex_shadow;
2462 mesa_instruction_annotation[i] = inst->ir;
2463
2464 if (ctx->Shader.EmitNoIfs && mesa_inst->Opcode == OPCODE_IF) {
2465 shader_program->InfoLog =
2466 talloc_asprintf_append(shader_program->InfoLog,
2467 "Couldn't flatten if statement\n");
2468 shader_program->LinkStatus = false;
2469 }
2470
2471 switch (mesa_inst->Opcode) {
2472 case OPCODE_BGNSUB:
2473 inst->function->inst = i;
2474 mesa_inst->Comment = strdup(inst->function->sig->function_name());
2475 break;
2476 case OPCODE_ENDSUB:
2477 mesa_inst->Comment = strdup(inst->function->sig->function_name());
2478 break;
2479 case OPCODE_CAL:
2480 mesa_inst->BranchTarget = inst->function->sig_id; /* rewritten later */
2481 break;
2482 case OPCODE_ARL:
2483 prog->NumAddressRegs = 1;
2484 break;
2485 default:
2486 break;
2487 }
2488
2489 mesa_inst++;
2490 i++;
2491 }
2492
2493 set_branchtargets(&v, mesa_instructions, num_instructions);
2494 if (ctx->Shader.Flags & GLSL_DUMP) {
2495 printf("Mesa %s program:\n", target_string);
2496 print_program(mesa_instructions, mesa_instruction_annotation,
2497 num_instructions);
2498 }
2499
2500 prog->Instructions = mesa_instructions;
2501 prog->NumInstructions = num_instructions;
2502
2503 _mesa_reference_program(ctx, &shader->Program, prog);
2504
2505 if ((ctx->Shader.Flags & GLSL_NO_OPT) == 0) {
2506 _mesa_optimize_program(ctx, prog);
2507 }
2508
2509 return prog;
2510 }
2511
2512 extern "C" {
2513
2514 void
2515 _mesa_glsl_compile_shader(GLcontext *ctx, struct gl_shader *shader)
2516 {
2517 struct _mesa_glsl_parse_state *state =
2518 new(shader) _mesa_glsl_parse_state(ctx, shader->Type, shader);
2519
2520 const char *source = shader->Source;
2521 state->error = preprocess(state, &source, &state->info_log,
2522 &ctx->Extensions);
2523
2524 if (!state->error) {
2525 _mesa_glsl_lexer_ctor(state, source);
2526 _mesa_glsl_parse(state);
2527 _mesa_glsl_lexer_dtor(state);
2528 }
2529
2530 shader->ir = new(shader) exec_list;
2531 if (!state->error && !state->translation_unit.is_empty())
2532 _mesa_ast_to_hir(shader->ir, state);
2533
2534 if (!state->error && !shader->ir->is_empty()) {
2535 validate_ir_tree(shader->ir);
2536
2537 /* Lowering */
2538 do_mat_op_to_vec(shader->ir);
2539 do_mod_to_fract(shader->ir);
2540 do_div_to_mul_rcp(shader->ir);
2541
2542 /* Optimization passes */
2543 bool progress;
2544 do {
2545 progress = false;
2546
2547 progress = do_function_inlining(shader->ir) || progress;
2548 progress = do_if_simplification(shader->ir) || progress;
2549 progress = do_copy_propagation(shader->ir) || progress;
2550 progress = do_dead_code_local(shader->ir) || progress;
2551 progress = do_dead_code_unlinked(shader->ir) || progress;
2552 progress = do_tree_grafting(shader->ir) || progress;
2553 progress = do_constant_variable_unlinked(shader->ir) || progress;
2554 progress = do_constant_folding(shader->ir) || progress;
2555 progress = do_algebraic(shader->ir) || progress;
2556 progress = do_if_return(shader->ir) || progress;
2557 if (ctx->Shader.EmitNoIfs)
2558 progress = do_if_to_cond_assign(shader->ir) || progress;
2559
2560 progress = do_vec_index_to_swizzle(shader->ir) || progress;
2561 /* Do this one after the previous to let the easier pass handle
2562 * constant vector indexing.
2563 */
2564 progress = do_vec_index_to_cond_assign(shader->ir) || progress;
2565
2566 progress = do_swizzle_swizzle(shader->ir) || progress;
2567 } while (progress);
2568
2569 validate_ir_tree(shader->ir);
2570 }
2571
2572 shader->symbols = state->symbols;
2573
2574 shader->CompileStatus = !state->error;
2575 shader->InfoLog = state->info_log;
2576 shader->Version = state->language_version;
2577 memcpy(shader->builtins_to_link, state->builtins_to_link,
2578 sizeof(shader->builtins_to_link[0]) * state->num_builtins_to_link);
2579 shader->num_builtins_to_link = state->num_builtins_to_link;
2580
2581 if (ctx->Shader.Flags & GLSL_LOG) {
2582 _mesa_write_shader_to_file(shader);
2583 }
2584
2585 /* Retain any live IR, but trash the rest. */
2586 reparent_ir(shader->ir, shader);
2587
2588 talloc_free(state);
2589 }
2590
2591 void
2592 _mesa_glsl_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
2593 {
2594 unsigned int i;
2595
2596 _mesa_clear_shader_program_data(ctx, prog);
2597
2598 prog->LinkStatus = GL_TRUE;
2599
2600 for (i = 0; i < prog->NumShaders; i++) {
2601 if (!prog->Shaders[i]->CompileStatus) {
2602 prog->InfoLog =
2603 talloc_asprintf_append(prog->InfoLog,
2604 "linking with uncompiled shader");
2605 prog->LinkStatus = GL_FALSE;
2606 }
2607 }
2608
2609 prog->Varying = _mesa_new_parameter_list();
2610 _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL);
2611 _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL);
2612
2613 if (prog->LinkStatus) {
2614 link_shaders(prog);
2615
2616 /* We don't use the linker's uniforms list, and cook up our own at
2617 * generate time.
2618 */
2619 free(prog->Uniforms);
2620 prog->Uniforms = _mesa_new_uniform_list();
2621 }
2622
2623 if (prog->LinkStatus) {
2624 for (i = 0; i < prog->_NumLinkedShaders; i++) {
2625 struct gl_program *linked_prog;
2626 bool ok = true;
2627
2628 linked_prog = get_mesa_program(ctx, prog,
2629 prog->_LinkedShaders[i]);
2630 count_resources(linked_prog);
2631
2632 link_uniforms_to_shared_uniform_list(prog->Uniforms, linked_prog);
2633
2634 switch (prog->_LinkedShaders[i]->Type) {
2635 case GL_VERTEX_SHADER:
2636 _mesa_reference_vertprog(ctx, &prog->VertexProgram,
2637 (struct gl_vertex_program *)linked_prog);
2638 ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB,
2639 linked_prog);
2640 break;
2641 case GL_FRAGMENT_SHADER:
2642 _mesa_reference_fragprog(ctx, &prog->FragmentProgram,
2643 (struct gl_fragment_program *)linked_prog);
2644 ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB,
2645 linked_prog);
2646 break;
2647 }
2648 if (!ok) {
2649 prog->LinkStatus = GL_FALSE;
2650 }
2651 }
2652 }
2653 }
2654
2655 } /* extern "C" */