1c674ea8756f7fb4c24229fec4279a6092f9afb2
[mesa.git] / src / mesa / program / ir_to_mesa.cpp
1 /*
2 * Copyright (C) 2005-2007 Brian Paul All Rights Reserved.
3 * Copyright (C) 2008 VMware, Inc. All Rights Reserved.
4 * Copyright © 2010 Intel Corporation
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
24 */
25
26 /**
27 * \file ir_to_mesa.cpp
28 *
29 * Translate GLSL IR to Mesa's gl_program representation.
30 */
31
32 #include <stdio.h>
33 #include "main/compiler.h"
34 #include "ir.h"
35 #include "ir_visitor.h"
36 #include "ir_print_visitor.h"
37 #include "ir_expression_flattening.h"
38 #include "glsl_types.h"
39 #include "glsl_parser_extras.h"
40 #include "../glsl/program.h"
41 #include "ir_optimization.h"
42 #include "ast.h"
43
44 extern "C" {
45 #include "main/mtypes.h"
46 #include "main/shaderapi.h"
47 #include "main/shaderobj.h"
48 #include "main/uniforms.h"
49 #include "program/hash_table.h"
50 #include "program/prog_instruction.h"
51 #include "program/prog_optimize.h"
52 #include "program/prog_print.h"
53 #include "program/program.h"
54 #include "program/prog_uniform.h"
55 #include "program/prog_parameter.h"
56 #include "program/sampler.h"
57 }
58
59 class src_reg;
60 class dst_reg;
61
62 static int swizzle_for_size(int size);
63
64 /**
65 * This struct is a corresponding struct to Mesa prog_src_register, with
66 * wider fields.
67 */
68 class src_reg {
69 public:
70 src_reg(gl_register_file file, int index, const glsl_type *type)
71 {
72 this->file = file;
73 this->index = index;
74 if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
75 this->swizzle = swizzle_for_size(type->vector_elements);
76 else
77 this->swizzle = SWIZZLE_XYZW;
78 this->negate = 0;
79 this->reladdr = NULL;
80 }
81
82 src_reg()
83 {
84 this->file = PROGRAM_UNDEFINED;
85 this->index = 0;
86 this->swizzle = 0;
87 this->negate = 0;
88 this->reladdr = NULL;
89 }
90
91 explicit src_reg(dst_reg reg);
92
93 gl_register_file file; /**< PROGRAM_* from Mesa */
94 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
95 GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
96 int negate; /**< NEGATE_XYZW mask from mesa */
97 /** Register index should be offset by the integer in this reg. */
98 src_reg *reladdr;
99 };
100
101 class dst_reg {
102 public:
103 dst_reg(gl_register_file file, int writemask)
104 {
105 this->file = file;
106 this->index = 0;
107 this->writemask = writemask;
108 this->cond_mask = COND_TR;
109 this->reladdr = NULL;
110 }
111
112 dst_reg()
113 {
114 this->file = PROGRAM_UNDEFINED;
115 this->index = 0;
116 this->writemask = 0;
117 this->cond_mask = COND_TR;
118 this->reladdr = NULL;
119 }
120
121 explicit dst_reg(src_reg reg);
122
123 gl_register_file file; /**< PROGRAM_* from Mesa */
124 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
125 int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
126 GLuint cond_mask:4;
127 /** Register index should be offset by the integer in this reg. */
128 src_reg *reladdr;
129 };
130
131 src_reg::src_reg(dst_reg reg)
132 {
133 this->file = reg.file;
134 this->index = reg.index;
135 this->swizzle = SWIZZLE_XYZW;
136 this->negate = 0;
137 this->reladdr = reg.reladdr;
138 }
139
140 dst_reg::dst_reg(src_reg reg)
141 {
142 this->file = reg.file;
143 this->index = reg.index;
144 this->writemask = WRITEMASK_XYZW;
145 this->cond_mask = COND_TR;
146 this->reladdr = reg.reladdr;
147 }
148
149 class ir_to_mesa_instruction : public exec_node {
150 public:
151 /* Callers of this ralloc-based new need not call delete. It's
152 * easier to just ralloc_free 'ctx' (or any of its ancestors). */
153 static void* operator new(size_t size, void *ctx)
154 {
155 void *node;
156
157 node = rzalloc_size(ctx, size);
158 assert(node != NULL);
159
160 return node;
161 }
162
163 enum prog_opcode op;
164 dst_reg dst;
165 src_reg src[3];
166 /** Pointer to the ir source this tree came from for debugging */
167 ir_instruction *ir;
168 GLboolean cond_update;
169 bool saturate;
170 int sampler; /**< sampler index */
171 int tex_target; /**< One of TEXTURE_*_INDEX */
172 GLboolean tex_shadow;
173
174 class function_entry *function; /* Set on OPCODE_CAL or OPCODE_BGNSUB */
175 };
176
177 class variable_storage : public exec_node {
178 public:
179 variable_storage(ir_variable *var, gl_register_file file, int index)
180 : file(file), index(index), var(var)
181 {
182 /* empty */
183 }
184
185 gl_register_file file;
186 int index;
187 ir_variable *var; /* variable that maps to this, if any */
188 };
189
190 class function_entry : public exec_node {
191 public:
192 ir_function_signature *sig;
193
194 /**
195 * identifier of this function signature used by the program.
196 *
197 * At the point that Mesa instructions for function calls are
198 * generated, we don't know the address of the first instruction of
199 * the function body. So we make the BranchTarget that is called a
200 * small integer and rewrite them during set_branchtargets().
201 */
202 int sig_id;
203
204 /**
205 * Pointer to first instruction of the function body.
206 *
207 * Set during function body emits after main() is processed.
208 */
209 ir_to_mesa_instruction *bgn_inst;
210
211 /**
212 * Index of the first instruction of the function body in actual
213 * Mesa IR.
214 *
215 * Set after convertion from ir_to_mesa_instruction to prog_instruction.
216 */
217 int inst;
218
219 /** Storage for the return value. */
220 src_reg return_reg;
221 };
222
223 class ir_to_mesa_visitor : public ir_visitor {
224 public:
225 ir_to_mesa_visitor();
226 ~ir_to_mesa_visitor();
227
228 function_entry *current_function;
229
230 struct gl_context *ctx;
231 struct gl_program *prog;
232 struct gl_shader_program *shader_program;
233 struct gl_shader_compiler_options *options;
234
235 int next_temp;
236
237 variable_storage *find_variable_storage(ir_variable *var);
238
239 function_entry *get_function_signature(ir_function_signature *sig);
240
241 src_reg get_temp(const glsl_type *type);
242 void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr);
243
244 src_reg src_reg_for_float(float val);
245
246 /**
247 * \name Visit methods
248 *
249 * As typical for the visitor pattern, there must be one \c visit method for
250 * each concrete subclass of \c ir_instruction. Virtual base classes within
251 * the hierarchy should not have \c visit methods.
252 */
253 /*@{*/
254 virtual void visit(ir_variable *);
255 virtual void visit(ir_loop *);
256 virtual void visit(ir_loop_jump *);
257 virtual void visit(ir_function_signature *);
258 virtual void visit(ir_function *);
259 virtual void visit(ir_expression *);
260 virtual void visit(ir_swizzle *);
261 virtual void visit(ir_dereference_variable *);
262 virtual void visit(ir_dereference_array *);
263 virtual void visit(ir_dereference_record *);
264 virtual void visit(ir_assignment *);
265 virtual void visit(ir_constant *);
266 virtual void visit(ir_call *);
267 virtual void visit(ir_return *);
268 virtual void visit(ir_discard *);
269 virtual void visit(ir_texture *);
270 virtual void visit(ir_if *);
271 /*@}*/
272
273 src_reg result;
274
275 /** List of variable_storage */
276 exec_list variables;
277
278 /** List of function_entry */
279 exec_list function_signatures;
280 int next_signature_id;
281
282 /** List of ir_to_mesa_instruction */
283 exec_list instructions;
284
285 ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op);
286
287 ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op,
288 dst_reg dst, src_reg src0);
289
290 ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op,
291 dst_reg dst, src_reg src0, src_reg src1);
292
293 ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op,
294 dst_reg dst,
295 src_reg src0, src_reg src1, src_reg src2);
296
297 /**
298 * Emit the correct dot-product instruction for the type of arguments
299 */
300 ir_to_mesa_instruction * emit_dp(ir_instruction *ir,
301 dst_reg dst,
302 src_reg src0,
303 src_reg src1,
304 unsigned elements);
305
306 void emit_scalar(ir_instruction *ir, enum prog_opcode op,
307 dst_reg dst, src_reg src0);
308
309 void emit_scalar(ir_instruction *ir, enum prog_opcode op,
310 dst_reg dst, src_reg src0, src_reg src1);
311
312 void emit_scs(ir_instruction *ir, enum prog_opcode op,
313 dst_reg dst, const src_reg &src);
314
315 GLboolean try_emit_mad(ir_expression *ir,
316 int mul_operand);
317 GLboolean try_emit_sat(ir_expression *ir);
318
319 void emit_swz(ir_expression *ir);
320
321 bool process_move_condition(ir_rvalue *ir);
322
323 void copy_propagate(void);
324
325 void *mem_ctx;
326 };
327
328 src_reg undef_src = src_reg(PROGRAM_UNDEFINED, 0, NULL);
329
330 dst_reg undef_dst = dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP);
331
332 dst_reg address_reg = dst_reg(PROGRAM_ADDRESS, WRITEMASK_X);
333
334 static int
335 swizzle_for_size(int size)
336 {
337 int size_swizzles[4] = {
338 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
339 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
340 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
341 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
342 };
343
344 assert((size >= 1) && (size <= 4));
345 return size_swizzles[size - 1];
346 }
347
348 ir_to_mesa_instruction *
349 ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op,
350 dst_reg dst,
351 src_reg src0, src_reg src1, src_reg src2)
352 {
353 ir_to_mesa_instruction *inst = new(mem_ctx) ir_to_mesa_instruction();
354 int num_reladdr = 0;
355
356 /* If we have to do relative addressing, we want to load the ARL
357 * reg directly for one of the regs, and preload the other reladdr
358 * sources into temps.
359 */
360 num_reladdr += dst.reladdr != NULL;
361 num_reladdr += src0.reladdr != NULL;
362 num_reladdr += src1.reladdr != NULL;
363 num_reladdr += src2.reladdr != NULL;
364
365 reladdr_to_temp(ir, &src2, &num_reladdr);
366 reladdr_to_temp(ir, &src1, &num_reladdr);
367 reladdr_to_temp(ir, &src0, &num_reladdr);
368
369 if (dst.reladdr) {
370 emit(ir, OPCODE_ARL, address_reg, *dst.reladdr);
371 num_reladdr--;
372 }
373 assert(num_reladdr == 0);
374
375 inst->op = op;
376 inst->dst = dst;
377 inst->src[0] = src0;
378 inst->src[1] = src1;
379 inst->src[2] = src2;
380 inst->ir = ir;
381
382 inst->function = NULL;
383
384 this->instructions.push_tail(inst);
385
386 return inst;
387 }
388
389
390 ir_to_mesa_instruction *
391 ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op,
392 dst_reg dst, src_reg src0, src_reg src1)
393 {
394 return emit(ir, op, dst, src0, src1, undef_src);
395 }
396
397 ir_to_mesa_instruction *
398 ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op,
399 dst_reg dst, src_reg src0)
400 {
401 assert(dst.writemask != 0);
402 return emit(ir, op, dst, src0, undef_src, undef_src);
403 }
404
405 ir_to_mesa_instruction *
406 ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op)
407 {
408 return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
409 }
410
411 ir_to_mesa_instruction *
412 ir_to_mesa_visitor::emit_dp(ir_instruction *ir,
413 dst_reg dst, src_reg src0, src_reg src1,
414 unsigned elements)
415 {
416 static const gl_inst_opcode dot_opcodes[] = {
417 OPCODE_DP2, OPCODE_DP3, OPCODE_DP4
418 };
419
420 return emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
421 }
422
423 /**
424 * Emits Mesa scalar opcodes to produce unique answers across channels.
425 *
426 * Some Mesa opcodes are scalar-only, like ARB_fp/vp. The src X
427 * channel determines the result across all channels. So to do a vec4
428 * of this operation, we want to emit a scalar per source channel used
429 * to produce dest channels.
430 */
431 void
432 ir_to_mesa_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op,
433 dst_reg dst,
434 src_reg orig_src0, src_reg orig_src1)
435 {
436 int i, j;
437 int done_mask = ~dst.writemask;
438
439 /* Mesa RCP is a scalar operation splatting results to all channels,
440 * like ARB_fp/vp. So emit as many RCPs as necessary to cover our
441 * dst channels.
442 */
443 for (i = 0; i < 4; i++) {
444 GLuint this_mask = (1 << i);
445 ir_to_mesa_instruction *inst;
446 src_reg src0 = orig_src0;
447 src_reg src1 = orig_src1;
448
449 if (done_mask & this_mask)
450 continue;
451
452 GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
453 GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
454 for (j = i + 1; j < 4; j++) {
455 /* If there is another enabled component in the destination that is
456 * derived from the same inputs, generate its value on this pass as
457 * well.
458 */
459 if (!(done_mask & (1 << j)) &&
460 GET_SWZ(src0.swizzle, j) == src0_swiz &&
461 GET_SWZ(src1.swizzle, j) == src1_swiz) {
462 this_mask |= (1 << j);
463 }
464 }
465 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
466 src0_swiz, src0_swiz);
467 src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
468 src1_swiz, src1_swiz);
469
470 inst = emit(ir, op, dst, src0, src1);
471 inst->dst.writemask = this_mask;
472 done_mask |= this_mask;
473 }
474 }
475
476 void
477 ir_to_mesa_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op,
478 dst_reg dst, src_reg src0)
479 {
480 src_reg undef = undef_src;
481
482 undef.swizzle = SWIZZLE_XXXX;
483
484 emit_scalar(ir, op, dst, src0, undef);
485 }
486
487 /**
488 * Emit an OPCODE_SCS instruction
489 *
490 * The \c SCS opcode functions a bit differently than the other Mesa (or
491 * ARB_fragment_program) opcodes. Instead of splatting its result across all
492 * four components of the destination, it writes one value to the \c x
493 * component and another value to the \c y component.
494 *
495 * \param ir IR instruction being processed
496 * \param op Either \c OPCODE_SIN or \c OPCODE_COS depending on which
497 * value is desired.
498 * \param dst Destination register
499 * \param src Source register
500 */
501 void
502 ir_to_mesa_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op,
503 dst_reg dst,
504 const src_reg &src)
505 {
506 /* Vertex programs cannot use the SCS opcode.
507 */
508 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) {
509 emit_scalar(ir, op, dst, src);
510 return;
511 }
512
513 const unsigned component = (op == OPCODE_SIN) ? 0 : 1;
514 const unsigned scs_mask = (1U << component);
515 int done_mask = ~dst.writemask;
516 src_reg tmp;
517
518 assert(op == OPCODE_SIN || op == OPCODE_COS);
519
520 /* If there are compnents in the destination that differ from the component
521 * that will be written by the SCS instrution, we'll need a temporary.
522 */
523 if (scs_mask != unsigned(dst.writemask)) {
524 tmp = get_temp(glsl_type::vec4_type);
525 }
526
527 for (unsigned i = 0; i < 4; i++) {
528 unsigned this_mask = (1U << i);
529 src_reg src0 = src;
530
531 if ((done_mask & this_mask) != 0)
532 continue;
533
534 /* The source swizzle specified which component of the source generates
535 * sine / cosine for the current component in the destination. The SCS
536 * instruction requires that this value be swizzle to the X component.
537 * Replace the current swizzle with a swizzle that puts the source in
538 * the X component.
539 */
540 unsigned src0_swiz = GET_SWZ(src.swizzle, i);
541
542 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
543 src0_swiz, src0_swiz);
544 for (unsigned j = i + 1; j < 4; j++) {
545 /* If there is another enabled component in the destination that is
546 * derived from the same inputs, generate its value on this pass as
547 * well.
548 */
549 if (!(done_mask & (1 << j)) &&
550 GET_SWZ(src0.swizzle, j) == src0_swiz) {
551 this_mask |= (1 << j);
552 }
553 }
554
555 if (this_mask != scs_mask) {
556 ir_to_mesa_instruction *inst;
557 dst_reg tmp_dst = dst_reg(tmp);
558
559 /* Emit the SCS instruction.
560 */
561 inst = emit(ir, OPCODE_SCS, tmp_dst, src0);
562 inst->dst.writemask = scs_mask;
563
564 /* Move the result of the SCS instruction to the desired location in
565 * the destination.
566 */
567 tmp.swizzle = MAKE_SWIZZLE4(component, component,
568 component, component);
569 inst = emit(ir, OPCODE_SCS, dst, tmp);
570 inst->dst.writemask = this_mask;
571 } else {
572 /* Emit the SCS instruction to write directly to the destination.
573 */
574 ir_to_mesa_instruction *inst = emit(ir, OPCODE_SCS, dst, src0);
575 inst->dst.writemask = scs_mask;
576 }
577
578 done_mask |= this_mask;
579 }
580 }
581
582 struct src_reg
583 ir_to_mesa_visitor::src_reg_for_float(float val)
584 {
585 src_reg src(PROGRAM_CONSTANT, -1, NULL);
586
587 src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
588 (const gl_constant_value *)&val, 1, &src.swizzle);
589
590 return src;
591 }
592
593 static int
594 type_size(const struct glsl_type *type)
595 {
596 unsigned int i;
597 int size;
598
599 switch (type->base_type) {
600 case GLSL_TYPE_UINT:
601 case GLSL_TYPE_INT:
602 case GLSL_TYPE_FLOAT:
603 case GLSL_TYPE_BOOL:
604 if (type->is_matrix()) {
605 return type->matrix_columns;
606 } else {
607 /* Regardless of size of vector, it gets a vec4. This is bad
608 * packing for things like floats, but otherwise arrays become a
609 * mess. Hopefully a later pass over the code can pack scalars
610 * down if appropriate.
611 */
612 return 1;
613 }
614 case GLSL_TYPE_ARRAY:
615 assert(type->length > 0);
616 return type_size(type->fields.array) * type->length;
617 case GLSL_TYPE_STRUCT:
618 size = 0;
619 for (i = 0; i < type->length; i++) {
620 size += type_size(type->fields.structure[i].type);
621 }
622 return size;
623 case GLSL_TYPE_SAMPLER:
624 /* Samplers take up one slot in UNIFORMS[], but they're baked in
625 * at link time.
626 */
627 return 1;
628 default:
629 assert(0);
630 return 0;
631 }
632 }
633
634 /**
635 * In the initial pass of codegen, we assign temporary numbers to
636 * intermediate results. (not SSA -- variable assignments will reuse
637 * storage). Actual register allocation for the Mesa VM occurs in a
638 * pass over the Mesa IR later.
639 */
640 src_reg
641 ir_to_mesa_visitor::get_temp(const glsl_type *type)
642 {
643 src_reg src;
644
645 src.file = PROGRAM_TEMPORARY;
646 src.index = next_temp;
647 src.reladdr = NULL;
648 next_temp += type_size(type);
649
650 if (type->is_array() || type->is_record()) {
651 src.swizzle = SWIZZLE_NOOP;
652 } else {
653 src.swizzle = swizzle_for_size(type->vector_elements);
654 }
655 src.negate = 0;
656
657 return src;
658 }
659
660 variable_storage *
661 ir_to_mesa_visitor::find_variable_storage(ir_variable *var)
662 {
663
664 variable_storage *entry;
665
666 foreach_iter(exec_list_iterator, iter, this->variables) {
667 entry = (variable_storage *)iter.get();
668
669 if (entry->var == var)
670 return entry;
671 }
672
673 return NULL;
674 }
675
676 void
677 ir_to_mesa_visitor::visit(ir_variable *ir)
678 {
679 if (strcmp(ir->name, "gl_FragCoord") == 0) {
680 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
681
682 fp->OriginUpperLeft = ir->origin_upper_left;
683 fp->PixelCenterInteger = ir->pixel_center_integer;
684
685 } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
686 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
687 switch (ir->depth_layout) {
688 case ir_depth_layout_none:
689 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_NONE;
690 break;
691 case ir_depth_layout_any:
692 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_ANY;
693 break;
694 case ir_depth_layout_greater:
695 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_GREATER;
696 break;
697 case ir_depth_layout_less:
698 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_LESS;
699 break;
700 case ir_depth_layout_unchanged:
701 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_UNCHANGED;
702 break;
703 default:
704 assert(0);
705 break;
706 }
707 }
708
709 if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
710 unsigned int i;
711 const ir_state_slot *const slots = ir->state_slots;
712 assert(ir->state_slots != NULL);
713
714 /* Check if this statevar's setup in the STATE file exactly
715 * matches how we'll want to reference it as a
716 * struct/array/whatever. If not, then we need to move it into
717 * temporary storage and hope that it'll get copy-propagated
718 * out.
719 */
720 for (i = 0; i < ir->num_state_slots; i++) {
721 if (slots[i].swizzle != SWIZZLE_XYZW) {
722 break;
723 }
724 }
725
726 struct variable_storage *storage;
727 dst_reg dst;
728 if (i == ir->num_state_slots) {
729 /* We'll set the index later. */
730 storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
731 this->variables.push_tail(storage);
732
733 dst = undef_dst;
734 } else {
735 /* The variable_storage constructor allocates slots based on the size
736 * of the type. However, this had better match the number of state
737 * elements that we're going to copy into the new temporary.
738 */
739 assert((int) ir->num_state_slots == type_size(ir->type));
740
741 storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY,
742 this->next_temp);
743 this->variables.push_tail(storage);
744 this->next_temp += type_size(ir->type);
745
746 dst = dst_reg(src_reg(PROGRAM_TEMPORARY, storage->index, NULL));
747 }
748
749
750 for (unsigned int i = 0; i < ir->num_state_slots; i++) {
751 int index = _mesa_add_state_reference(this->prog->Parameters,
752 (gl_state_index *)slots[i].tokens);
753
754 if (storage->file == PROGRAM_STATE_VAR) {
755 if (storage->index == -1) {
756 storage->index = index;
757 } else {
758 assert(index == storage->index + (int)i);
759 }
760 } else {
761 src_reg src(PROGRAM_STATE_VAR, index, NULL);
762 src.swizzle = slots[i].swizzle;
763 emit(ir, OPCODE_MOV, dst, src);
764 /* even a float takes up a whole vec4 reg in a struct/array. */
765 dst.index++;
766 }
767 }
768
769 if (storage->file == PROGRAM_TEMPORARY &&
770 dst.index != storage->index + (int) ir->num_state_slots) {
771 linker_error(this->shader_program,
772 "failed to load builtin uniform `%s' "
773 "(%d/%d regs loaded)\n",
774 ir->name, dst.index - storage->index,
775 type_size(ir->type));
776 }
777 }
778 }
779
780 void
781 ir_to_mesa_visitor::visit(ir_loop *ir)
782 {
783 ir_dereference_variable *counter = NULL;
784
785 if (ir->counter != NULL)
786 counter = new(mem_ctx) ir_dereference_variable(ir->counter);
787
788 if (ir->from != NULL) {
789 assert(ir->counter != NULL);
790
791 ir_assignment *a =
792 new(mem_ctx) ir_assignment(counter, ir->from, NULL);
793
794 a->accept(this);
795 }
796
797 emit(NULL, OPCODE_BGNLOOP);
798
799 if (ir->to) {
800 ir_expression *e =
801 new(mem_ctx) ir_expression(ir->cmp, glsl_type::bool_type,
802 counter, ir->to);
803 ir_if *if_stmt = new(mem_ctx) ir_if(e);
804
805 ir_loop_jump *brk =
806 new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_break);
807
808 if_stmt->then_instructions.push_tail(brk);
809
810 if_stmt->accept(this);
811 }
812
813 visit_exec_list(&ir->body_instructions, this);
814
815 if (ir->increment) {
816 ir_expression *e =
817 new(mem_ctx) ir_expression(ir_binop_add, counter->type,
818 counter, ir->increment);
819
820 ir_assignment *a =
821 new(mem_ctx) ir_assignment(counter, e, NULL);
822
823 a->accept(this);
824 }
825
826 emit(NULL, OPCODE_ENDLOOP);
827 }
828
829 void
830 ir_to_mesa_visitor::visit(ir_loop_jump *ir)
831 {
832 switch (ir->mode) {
833 case ir_loop_jump::jump_break:
834 emit(NULL, OPCODE_BRK);
835 break;
836 case ir_loop_jump::jump_continue:
837 emit(NULL, OPCODE_CONT);
838 break;
839 }
840 }
841
842
843 void
844 ir_to_mesa_visitor::visit(ir_function_signature *ir)
845 {
846 assert(0);
847 (void)ir;
848 }
849
850 void
851 ir_to_mesa_visitor::visit(ir_function *ir)
852 {
853 /* Ignore function bodies other than main() -- we shouldn't see calls to
854 * them since they should all be inlined before we get to ir_to_mesa.
855 */
856 if (strcmp(ir->name, "main") == 0) {
857 const ir_function_signature *sig;
858 exec_list empty;
859
860 sig = ir->matching_signature(&empty);
861
862 assert(sig);
863
864 foreach_iter(exec_list_iterator, iter, sig->body) {
865 ir_instruction *ir = (ir_instruction *)iter.get();
866
867 ir->accept(this);
868 }
869 }
870 }
871
872 GLboolean
873 ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
874 {
875 int nonmul_operand = 1 - mul_operand;
876 src_reg a, b, c;
877
878 ir_expression *expr = ir->operands[mul_operand]->as_expression();
879 if (!expr || expr->operation != ir_binop_mul)
880 return false;
881
882 expr->operands[0]->accept(this);
883 a = this->result;
884 expr->operands[1]->accept(this);
885 b = this->result;
886 ir->operands[nonmul_operand]->accept(this);
887 c = this->result;
888
889 this->result = get_temp(ir->type);
890 emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, c);
891
892 return true;
893 }
894
895 GLboolean
896 ir_to_mesa_visitor::try_emit_sat(ir_expression *ir)
897 {
898 /* Saturates were only introduced to vertex programs in
899 * NV_vertex_program3, so don't give them to drivers in the VP.
900 */
901 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB)
902 return false;
903
904 ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
905 if (!sat_src)
906 return false;
907
908 sat_src->accept(this);
909 src_reg src = this->result;
910
911 /* If we generated an expression instruction into a temporary in
912 * processing the saturate's operand, apply the saturate to that
913 * instruction. Otherwise, generate a MOV to do the saturate.
914 *
915 * Note that we have to be careful to only do this optimization if
916 * the instruction in question was what generated src->result. For
917 * example, ir_dereference_array might generate a MUL instruction
918 * to create the reladdr, and return us a src reg using that
919 * reladdr. That MUL result is not the value we're trying to
920 * saturate.
921 */
922 ir_expression *sat_src_expr = sat_src->as_expression();
923 ir_to_mesa_instruction *new_inst;
924 new_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
925 if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul ||
926 sat_src_expr->operation == ir_binop_add ||
927 sat_src_expr->operation == ir_binop_dot)) {
928 new_inst->saturate = true;
929 } else {
930 this->result = get_temp(ir->type);
931 ir_to_mesa_instruction *inst;
932 inst = emit(ir, OPCODE_MOV, dst_reg(this->result), src);
933 inst->saturate = true;
934 }
935
936 return true;
937 }
938
939 void
940 ir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir,
941 src_reg *reg, int *num_reladdr)
942 {
943 if (!reg->reladdr)
944 return;
945
946 emit(ir, OPCODE_ARL, address_reg, *reg->reladdr);
947
948 if (*num_reladdr != 1) {
949 src_reg temp = get_temp(glsl_type::vec4_type);
950
951 emit(ir, OPCODE_MOV, dst_reg(temp), *reg);
952 *reg = temp;
953 }
954
955 (*num_reladdr)--;
956 }
957
958 void
959 ir_to_mesa_visitor::emit_swz(ir_expression *ir)
960 {
961 /* Assume that the vector operator is in a form compatible with OPCODE_SWZ.
962 * This means that each of the operands is either an immediate value of -1,
963 * 0, or 1, or is a component from one source register (possibly with
964 * negation).
965 */
966 uint8_t components[4] = { 0 };
967 bool negate[4] = { false };
968 ir_variable *var = NULL;
969
970 for (unsigned i = 0; i < ir->type->vector_elements; i++) {
971 ir_rvalue *op = ir->operands[i];
972
973 assert(op->type->is_scalar());
974
975 while (op != NULL) {
976 switch (op->ir_type) {
977 case ir_type_constant: {
978
979 assert(op->type->is_scalar());
980
981 const ir_constant *const c = op->as_constant();
982 if (c->is_one()) {
983 components[i] = SWIZZLE_ONE;
984 } else if (c->is_zero()) {
985 components[i] = SWIZZLE_ZERO;
986 } else if (c->is_negative_one()) {
987 components[i] = SWIZZLE_ONE;
988 negate[i] = true;
989 } else {
990 assert(!"SWZ constant must be 0.0 or 1.0.");
991 }
992
993 op = NULL;
994 break;
995 }
996
997 case ir_type_dereference_variable: {
998 ir_dereference_variable *const deref =
999 (ir_dereference_variable *) op;
1000
1001 assert((var == NULL) || (deref->var == var));
1002 components[i] = SWIZZLE_X;
1003 var = deref->var;
1004 op = NULL;
1005 break;
1006 }
1007
1008 case ir_type_expression: {
1009 ir_expression *const expr = (ir_expression *) op;
1010
1011 assert(expr->operation == ir_unop_neg);
1012 negate[i] = true;
1013
1014 op = expr->operands[0];
1015 break;
1016 }
1017
1018 case ir_type_swizzle: {
1019 ir_swizzle *const swiz = (ir_swizzle *) op;
1020
1021 components[i] = swiz->mask.x;
1022 op = swiz->val;
1023 break;
1024 }
1025
1026 default:
1027 assert(!"Should not get here.");
1028 return;
1029 }
1030 }
1031 }
1032
1033 assert(var != NULL);
1034
1035 ir_dereference_variable *const deref =
1036 new(mem_ctx) ir_dereference_variable(var);
1037
1038 this->result.file = PROGRAM_UNDEFINED;
1039 deref->accept(this);
1040 if (this->result.file == PROGRAM_UNDEFINED) {
1041 ir_print_visitor v;
1042 printf("Failed to get tree for expression operand:\n");
1043 deref->accept(&v);
1044 exit(1);
1045 }
1046
1047 src_reg src;
1048
1049 src = this->result;
1050 src.swizzle = MAKE_SWIZZLE4(components[0],
1051 components[1],
1052 components[2],
1053 components[3]);
1054 src.negate = ((unsigned(negate[0]) << 0)
1055 | (unsigned(negate[1]) << 1)
1056 | (unsigned(negate[2]) << 2)
1057 | (unsigned(negate[3]) << 3));
1058
1059 /* Storage for our result. Ideally for an assignment we'd be using the
1060 * actual storage for the result here, instead.
1061 */
1062 const src_reg result_src = get_temp(ir->type);
1063 dst_reg result_dst = dst_reg(result_src);
1064
1065 /* Limit writes to the channels that will be used by result_src later.
1066 * This does limit this temp's use as a temporary for multi-instruction
1067 * sequences.
1068 */
1069 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1070
1071 emit(ir, OPCODE_SWZ, result_dst, src);
1072 this->result = result_src;
1073 }
1074
1075 void
1076 ir_to_mesa_visitor::visit(ir_expression *ir)
1077 {
1078 unsigned int operand;
1079 src_reg op[Elements(ir->operands)];
1080 src_reg result_src;
1081 dst_reg result_dst;
1082
1083 /* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c)
1084 */
1085 if (ir->operation == ir_binop_add) {
1086 if (try_emit_mad(ir, 1))
1087 return;
1088 if (try_emit_mad(ir, 0))
1089 return;
1090 }
1091 if (try_emit_sat(ir))
1092 return;
1093
1094 if (ir->operation == ir_quadop_vector) {
1095 this->emit_swz(ir);
1096 return;
1097 }
1098
1099 for (operand = 0; operand < ir->get_num_operands(); operand++) {
1100 this->result.file = PROGRAM_UNDEFINED;
1101 ir->operands[operand]->accept(this);
1102 if (this->result.file == PROGRAM_UNDEFINED) {
1103 ir_print_visitor v;
1104 printf("Failed to get tree for expression operand:\n");
1105 ir->operands[operand]->accept(&v);
1106 exit(1);
1107 }
1108 op[operand] = this->result;
1109
1110 /* Matrix expression operands should have been broken down to vector
1111 * operations already.
1112 */
1113 assert(!ir->operands[operand]->type->is_matrix());
1114 }
1115
1116 int vector_elements = ir->operands[0]->type->vector_elements;
1117 if (ir->operands[1]) {
1118 vector_elements = MAX2(vector_elements,
1119 ir->operands[1]->type->vector_elements);
1120 }
1121
1122 this->result.file = PROGRAM_UNDEFINED;
1123
1124 /* Storage for our result. Ideally for an assignment we'd be using
1125 * the actual storage for the result here, instead.
1126 */
1127 result_src = get_temp(ir->type);
1128 /* convenience for the emit functions below. */
1129 result_dst = dst_reg(result_src);
1130 /* Limit writes to the channels that will be used by result_src later.
1131 * This does limit this temp's use as a temporary for multi-instruction
1132 * sequences.
1133 */
1134 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1135
1136 switch (ir->operation) {
1137 case ir_unop_logic_not:
1138 /* Previously 'SEQ dst, src, 0.0' was used for this. However, many
1139 * older GPUs implement SEQ using multiple instructions (i915 uses two
1140 * SGE instructions and a MUL instruction). Since our logic values are
1141 * 0.0 and 1.0, 1-x also implements !x.
1142 */
1143 op[0].negate = ~op[0].negate;
1144 emit(ir, OPCODE_ADD, result_dst, op[0], src_reg_for_float(1.0));
1145 break;
1146 case ir_unop_neg:
1147 op[0].negate = ~op[0].negate;
1148 result_src = op[0];
1149 break;
1150 case ir_unop_abs:
1151 emit(ir, OPCODE_ABS, result_dst, op[0]);
1152 break;
1153 case ir_unop_sign:
1154 emit(ir, OPCODE_SSG, result_dst, op[0]);
1155 break;
1156 case ir_unop_rcp:
1157 emit_scalar(ir, OPCODE_RCP, result_dst, op[0]);
1158 break;
1159
1160 case ir_unop_exp2:
1161 emit_scalar(ir, OPCODE_EX2, result_dst, op[0]);
1162 break;
1163 case ir_unop_exp:
1164 case ir_unop_log:
1165 assert(!"not reached: should be handled by ir_explog_to_explog2");
1166 break;
1167 case ir_unop_log2:
1168 emit_scalar(ir, OPCODE_LG2, result_dst, op[0]);
1169 break;
1170 case ir_unop_sin:
1171 emit_scalar(ir, OPCODE_SIN, result_dst, op[0]);
1172 break;
1173 case ir_unop_cos:
1174 emit_scalar(ir, OPCODE_COS, result_dst, op[0]);
1175 break;
1176 case ir_unop_sin_reduced:
1177 emit_scs(ir, OPCODE_SIN, result_dst, op[0]);
1178 break;
1179 case ir_unop_cos_reduced:
1180 emit_scs(ir, OPCODE_COS, result_dst, op[0]);
1181 break;
1182
1183 case ir_unop_dFdx:
1184 emit(ir, OPCODE_DDX, result_dst, op[0]);
1185 break;
1186 case ir_unop_dFdy:
1187 emit(ir, OPCODE_DDY, result_dst, op[0]);
1188 break;
1189
1190 case ir_unop_noise: {
1191 const enum prog_opcode opcode =
1192 prog_opcode(OPCODE_NOISE1
1193 + (ir->operands[0]->type->vector_elements) - 1);
1194 assert((opcode >= OPCODE_NOISE1) && (opcode <= OPCODE_NOISE4));
1195
1196 emit(ir, opcode, result_dst, op[0]);
1197 break;
1198 }
1199
1200 case ir_binop_add:
1201 emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
1202 break;
1203 case ir_binop_sub:
1204 emit(ir, OPCODE_SUB, result_dst, op[0], op[1]);
1205 break;
1206
1207 case ir_binop_mul:
1208 emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
1209 break;
1210 case ir_binop_div:
1211 assert(!"not reached: should be handled by ir_div_to_mul_rcp");
1212 case ir_binop_mod:
1213 assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
1214 break;
1215
1216 case ir_binop_less:
1217 emit(ir, OPCODE_SLT, result_dst, op[0], op[1]);
1218 break;
1219 case ir_binop_greater:
1220 emit(ir, OPCODE_SGT, result_dst, op[0], op[1]);
1221 break;
1222 case ir_binop_lequal:
1223 emit(ir, OPCODE_SLE, result_dst, op[0], op[1]);
1224 break;
1225 case ir_binop_gequal:
1226 emit(ir, OPCODE_SGE, result_dst, op[0], op[1]);
1227 break;
1228 case ir_binop_equal:
1229 emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
1230 break;
1231 case ir_binop_nequal:
1232 emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
1233 break;
1234 case ir_binop_all_equal:
1235 /* "==" operator producing a scalar boolean. */
1236 if (ir->operands[0]->type->is_vector() ||
1237 ir->operands[1]->type->is_vector()) {
1238 src_reg temp = get_temp(glsl_type::vec4_type);
1239 emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]);
1240 emit_dp(ir, result_dst, temp, temp, vector_elements);
1241 emit(ir, OPCODE_SEQ, result_dst, result_src, src_reg_for_float(0.0));
1242 } else {
1243 emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
1244 }
1245 break;
1246 case ir_binop_any_nequal:
1247 /* "!=" operator producing a scalar boolean. */
1248 if (ir->operands[0]->type->is_vector() ||
1249 ir->operands[1]->type->is_vector()) {
1250 src_reg temp = get_temp(glsl_type::vec4_type);
1251 emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]);
1252
1253 /* After the dot-product, the value will be an integer on the
1254 * range [0,4]. Zero stays zero, and positive values become 1.0.
1255 */
1256 ir_to_mesa_instruction *const dp =
1257 emit_dp(ir, result_dst, temp, temp, vector_elements);
1258 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1259 /* The clamping to [0,1] can be done for free in the fragment
1260 * shader with a saturate.
1261 */
1262 dp->saturate = true;
1263 } else {
1264 /* Negating the result of the dot-product gives values on the range
1265 * [-4, 0]. Zero stays zero, and negative values become 1.0. This
1266 * achieved using SLT.
1267 */
1268 src_reg slt_src = result_src;
1269 slt_src.negate = ~slt_src.negate;
1270 emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
1271 }
1272 } else {
1273 emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
1274 }
1275 break;
1276
1277 case ir_unop_any: {
1278 assert(ir->operands[0]->type->is_vector());
1279
1280 /* After the dot-product, the value will be an integer on the
1281 * range [0,4]. Zero stays zero, and positive values become 1.0.
1282 */
1283 ir_to_mesa_instruction *const dp =
1284 emit_dp(ir, result_dst, op[0], op[0],
1285 ir->operands[0]->type->vector_elements);
1286 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1287 /* The clamping to [0,1] can be done for free in the fragment
1288 * shader with a saturate.
1289 */
1290 dp->saturate = true;
1291 } else {
1292 /* Negating the result of the dot-product gives values on the range
1293 * [-4, 0]. Zero stays zero, and negative values become 1.0. This
1294 * is achieved using SLT.
1295 */
1296 src_reg slt_src = result_src;
1297 slt_src.negate = ~slt_src.negate;
1298 emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
1299 }
1300 break;
1301 }
1302
1303 case ir_binop_logic_xor:
1304 emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
1305 break;
1306
1307 case ir_binop_logic_or: {
1308 /* After the addition, the value will be an integer on the
1309 * range [0,2]. Zero stays zero, and positive values become 1.0.
1310 */
1311 ir_to_mesa_instruction *add =
1312 emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
1313 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1314 /* The clamping to [0,1] can be done for free in the fragment
1315 * shader with a saturate.
1316 */
1317 add->saturate = true;
1318 } else {
1319 /* Negating the result of the addition gives values on the range
1320 * [-2, 0]. Zero stays zero, and negative values become 1.0. This
1321 * is achieved using SLT.
1322 */
1323 src_reg slt_src = result_src;
1324 slt_src.negate = ~slt_src.negate;
1325 emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
1326 }
1327 break;
1328 }
1329
1330 case ir_binop_logic_and:
1331 /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
1332 emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
1333 break;
1334
1335 case ir_binop_dot:
1336 assert(ir->operands[0]->type->is_vector());
1337 assert(ir->operands[0]->type == ir->operands[1]->type);
1338 emit_dp(ir, result_dst, op[0], op[1],
1339 ir->operands[0]->type->vector_elements);
1340 break;
1341
1342 case ir_unop_sqrt:
1343 /* sqrt(x) = x * rsq(x). */
1344 emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]);
1345 emit(ir, OPCODE_MUL, result_dst, result_src, op[0]);
1346 /* For incoming channels <= 0, set the result to 0. */
1347 op[0].negate = ~op[0].negate;
1348 emit(ir, OPCODE_CMP, result_dst,
1349 op[0], result_src, src_reg_for_float(0.0));
1350 break;
1351 case ir_unop_rsq:
1352 emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]);
1353 break;
1354 case ir_unop_i2f:
1355 case ir_unop_u2f:
1356 case ir_unop_b2f:
1357 case ir_unop_b2i:
1358 case ir_unop_i2u:
1359 case ir_unop_u2i:
1360 /* Mesa IR lacks types, ints are stored as truncated floats. */
1361 result_src = op[0];
1362 break;
1363 case ir_unop_f2i:
1364 emit(ir, OPCODE_TRUNC, result_dst, op[0]);
1365 break;
1366 case ir_unop_f2b:
1367 case ir_unop_i2b:
1368 emit(ir, OPCODE_SNE, result_dst,
1369 op[0], src_reg_for_float(0.0));
1370 break;
1371 case ir_unop_trunc:
1372 emit(ir, OPCODE_TRUNC, result_dst, op[0]);
1373 break;
1374 case ir_unop_ceil:
1375 op[0].negate = ~op[0].negate;
1376 emit(ir, OPCODE_FLR, result_dst, op[0]);
1377 result_src.negate = ~result_src.negate;
1378 break;
1379 case ir_unop_floor:
1380 emit(ir, OPCODE_FLR, result_dst, op[0]);
1381 break;
1382 case ir_unop_fract:
1383 emit(ir, OPCODE_FRC, result_dst, op[0]);
1384 break;
1385
1386 case ir_binop_min:
1387 emit(ir, OPCODE_MIN, result_dst, op[0], op[1]);
1388 break;
1389 case ir_binop_max:
1390 emit(ir, OPCODE_MAX, result_dst, op[0], op[1]);
1391 break;
1392 case ir_binop_pow:
1393 emit_scalar(ir, OPCODE_POW, result_dst, op[0], op[1]);
1394 break;
1395
1396 case ir_unop_bit_not:
1397 case ir_binop_lshift:
1398 case ir_binop_rshift:
1399 case ir_binop_bit_and:
1400 case ir_binop_bit_xor:
1401 case ir_binop_bit_or:
1402 case ir_unop_round_even:
1403 assert(!"GLSL 1.30 features unsupported");
1404 break;
1405
1406 case ir_quadop_vector:
1407 /* This operation should have already been handled.
1408 */
1409 assert(!"Should not get here.");
1410 break;
1411 }
1412
1413 this->result = result_src;
1414 }
1415
1416
1417 void
1418 ir_to_mesa_visitor::visit(ir_swizzle *ir)
1419 {
1420 src_reg src;
1421 int i;
1422 int swizzle[4];
1423
1424 /* Note that this is only swizzles in expressions, not those on the left
1425 * hand side of an assignment, which do write masking. See ir_assignment
1426 * for that.
1427 */
1428
1429 ir->val->accept(this);
1430 src = this->result;
1431 assert(src.file != PROGRAM_UNDEFINED);
1432
1433 for (i = 0; i < 4; i++) {
1434 if (i < ir->type->vector_elements) {
1435 switch (i) {
1436 case 0:
1437 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x);
1438 break;
1439 case 1:
1440 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y);
1441 break;
1442 case 2:
1443 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z);
1444 break;
1445 case 3:
1446 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w);
1447 break;
1448 }
1449 } else {
1450 /* If the type is smaller than a vec4, replicate the last
1451 * channel out.
1452 */
1453 swizzle[i] = swizzle[ir->type->vector_elements - 1];
1454 }
1455 }
1456
1457 src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1458
1459 this->result = src;
1460 }
1461
1462 void
1463 ir_to_mesa_visitor::visit(ir_dereference_variable *ir)
1464 {
1465 variable_storage *entry = find_variable_storage(ir->var);
1466 ir_variable *var = ir->var;
1467
1468 if (!entry) {
1469 switch (var->mode) {
1470 case ir_var_uniform:
1471 entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
1472 var->location);
1473 this->variables.push_tail(entry);
1474 break;
1475 case ir_var_in:
1476 case ir_var_inout:
1477 /* The linker assigns locations for varyings and attributes,
1478 * including deprecated builtins (like gl_Color),
1479 * user-assigned generic attributes (glBindVertexLocation),
1480 * and user-defined varyings.
1481 *
1482 * FINISHME: We would hit this path for function arguments. Fix!
1483 */
1484 assert(var->location != -1);
1485 entry = new(mem_ctx) variable_storage(var,
1486 PROGRAM_INPUT,
1487 var->location);
1488 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
1489 var->location >= VERT_ATTRIB_GENERIC0) {
1490 _mesa_add_attribute(this->prog->Attributes,
1491 var->name,
1492 _mesa_sizeof_glsl_type(var->type->gl_type),
1493 var->type->gl_type,
1494 var->location - VERT_ATTRIB_GENERIC0);
1495 }
1496 break;
1497 case ir_var_out:
1498 assert(var->location != -1);
1499 entry = new(mem_ctx) variable_storage(var,
1500 PROGRAM_OUTPUT,
1501 var->location);
1502 break;
1503 case ir_var_system_value:
1504 entry = new(mem_ctx) variable_storage(var,
1505 PROGRAM_SYSTEM_VALUE,
1506 var->location);
1507 break;
1508 case ir_var_auto:
1509 case ir_var_temporary:
1510 entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY,
1511 this->next_temp);
1512 this->variables.push_tail(entry);
1513
1514 next_temp += type_size(var->type);
1515 break;
1516 }
1517
1518 if (!entry) {
1519 printf("Failed to make storage for %s\n", var->name);
1520 exit(1);
1521 }
1522 }
1523
1524 this->result = src_reg(entry->file, entry->index, var->type);
1525 }
1526
1527 void
1528 ir_to_mesa_visitor::visit(ir_dereference_array *ir)
1529 {
1530 ir_constant *index;
1531 src_reg src;
1532 int element_size = type_size(ir->type);
1533
1534 index = ir->array_index->constant_expression_value();
1535
1536 ir->array->accept(this);
1537 src = this->result;
1538
1539 if (index) {
1540 src.index += index->value.i[0] * element_size;
1541 } else {
1542 /* Variable index array dereference. It eats the "vec4" of the
1543 * base of the array and an index that offsets the Mesa register
1544 * index.
1545 */
1546 ir->array_index->accept(this);
1547
1548 src_reg index_reg;
1549
1550 if (element_size == 1) {
1551 index_reg = this->result;
1552 } else {
1553 index_reg = get_temp(glsl_type::float_type);
1554
1555 emit(ir, OPCODE_MUL, dst_reg(index_reg),
1556 this->result, src_reg_for_float(element_size));
1557 }
1558
1559 /* If there was already a relative address register involved, add the
1560 * new and the old together to get the new offset.
1561 */
1562 if (src.reladdr != NULL) {
1563 src_reg accum_reg = get_temp(glsl_type::float_type);
1564
1565 emit(ir, OPCODE_ADD, dst_reg(accum_reg),
1566 index_reg, *src.reladdr);
1567
1568 index_reg = accum_reg;
1569 }
1570
1571 src.reladdr = ralloc(mem_ctx, src_reg);
1572 memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1573 }
1574
1575 /* If the type is smaller than a vec4, replicate the last channel out. */
1576 if (ir->type->is_scalar() || ir->type->is_vector())
1577 src.swizzle = swizzle_for_size(ir->type->vector_elements);
1578 else
1579 src.swizzle = SWIZZLE_NOOP;
1580
1581 this->result = src;
1582 }
1583
1584 void
1585 ir_to_mesa_visitor::visit(ir_dereference_record *ir)
1586 {
1587 unsigned int i;
1588 const glsl_type *struct_type = ir->record->type;
1589 int offset = 0;
1590
1591 ir->record->accept(this);
1592
1593 for (i = 0; i < struct_type->length; i++) {
1594 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1595 break;
1596 offset += type_size(struct_type->fields.structure[i].type);
1597 }
1598
1599 /* If the type is smaller than a vec4, replicate the last channel out. */
1600 if (ir->type->is_scalar() || ir->type->is_vector())
1601 this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1602 else
1603 this->result.swizzle = SWIZZLE_NOOP;
1604
1605 this->result.index += offset;
1606 }
1607
1608 /**
1609 * We want to be careful in assignment setup to hit the actual storage
1610 * instead of potentially using a temporary like we might with the
1611 * ir_dereference handler.
1612 */
1613 static dst_reg
1614 get_assignment_lhs(ir_dereference *ir, ir_to_mesa_visitor *v)
1615 {
1616 /* The LHS must be a dereference. If the LHS is a variable indexed array
1617 * access of a vector, it must be separated into a series conditional moves
1618 * before reaching this point (see ir_vec_index_to_cond_assign).
1619 */
1620 assert(ir->as_dereference());
1621 ir_dereference_array *deref_array = ir->as_dereference_array();
1622 if (deref_array) {
1623 assert(!deref_array->array->type->is_vector());
1624 }
1625
1626 /* Use the rvalue deref handler for the most part. We'll ignore
1627 * swizzles in it and write swizzles using writemask, though.
1628 */
1629 ir->accept(v);
1630 return dst_reg(v->result);
1631 }
1632
1633 /**
1634 * Process the condition of a conditional assignment
1635 *
1636 * Examines the condition of a conditional assignment to generate the optimal
1637 * first operand of a \c CMP instruction. If the condition is a relational
1638 * operator with 0 (e.g., \c ir_binop_less), the value being compared will be
1639 * used as the source for the \c CMP instruction. Otherwise the comparison
1640 * is processed to a boolean result, and the boolean result is used as the
1641 * operand to the CMP instruction.
1642 */
1643 bool
1644 ir_to_mesa_visitor::process_move_condition(ir_rvalue *ir)
1645 {
1646 ir_rvalue *src_ir = ir;
1647 bool negate = true;
1648 bool switch_order = false;
1649
1650 ir_expression *const expr = ir->as_expression();
1651 if ((expr != NULL) && (expr->get_num_operands() == 2)) {
1652 bool zero_on_left = false;
1653
1654 if (expr->operands[0]->is_zero()) {
1655 src_ir = expr->operands[1];
1656 zero_on_left = true;
1657 } else if (expr->operands[1]->is_zero()) {
1658 src_ir = expr->operands[0];
1659 zero_on_left = false;
1660 }
1661
1662 /* a is - 0 + - 0 +
1663 * (a < 0) T F F ( a < 0) T F F
1664 * (0 < a) F F T (-a < 0) F F T
1665 * (a <= 0) T T F (-a < 0) F F T (swap order of other operands)
1666 * (0 <= a) F T T ( a < 0) T F F (swap order of other operands)
1667 * (a > 0) F F T (-a < 0) F F T
1668 * (0 > a) T F F ( a < 0) T F F
1669 * (a >= 0) F T T ( a < 0) T F F (swap order of other operands)
1670 * (0 >= a) T T F (-a < 0) F F T (swap order of other operands)
1671 *
1672 * Note that exchanging the order of 0 and 'a' in the comparison simply
1673 * means that the value of 'a' should be negated.
1674 */
1675 if (src_ir != ir) {
1676 switch (expr->operation) {
1677 case ir_binop_less:
1678 switch_order = false;
1679 negate = zero_on_left;
1680 break;
1681
1682 case ir_binop_greater:
1683 switch_order = false;
1684 negate = !zero_on_left;
1685 break;
1686
1687 case ir_binop_lequal:
1688 switch_order = true;
1689 negate = !zero_on_left;
1690 break;
1691
1692 case ir_binop_gequal:
1693 switch_order = true;
1694 negate = zero_on_left;
1695 break;
1696
1697 default:
1698 /* This isn't the right kind of comparison afterall, so make sure
1699 * the whole condition is visited.
1700 */
1701 src_ir = ir;
1702 break;
1703 }
1704 }
1705 }
1706
1707 src_ir->accept(this);
1708
1709 /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
1710 * condition we produced is 0.0 or 1.0. By flipping the sign, we can
1711 * choose which value OPCODE_CMP produces without an extra instruction
1712 * computing the condition.
1713 */
1714 if (negate)
1715 this->result.negate = ~this->result.negate;
1716
1717 return switch_order;
1718 }
1719
1720 void
1721 ir_to_mesa_visitor::visit(ir_assignment *ir)
1722 {
1723 dst_reg l;
1724 src_reg r;
1725 int i;
1726
1727 ir->rhs->accept(this);
1728 r = this->result;
1729
1730 l = get_assignment_lhs(ir->lhs, this);
1731
1732 /* FINISHME: This should really set to the correct maximal writemask for each
1733 * FINISHME: component written (in the loops below). This case can only
1734 * FINISHME: occur for matrices, arrays, and structures.
1735 */
1736 if (ir->write_mask == 0) {
1737 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
1738 l.writemask = WRITEMASK_XYZW;
1739 } else if (ir->lhs->type->is_scalar()) {
1740 /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the
1741 * FINISHME: W component of fragment shader output zero, work correctly.
1742 */
1743 l.writemask = WRITEMASK_XYZW;
1744 } else {
1745 int swizzles[4];
1746 int first_enabled_chan = 0;
1747 int rhs_chan = 0;
1748
1749 assert(ir->lhs->type->is_vector());
1750 l.writemask = ir->write_mask;
1751
1752 for (int i = 0; i < 4; i++) {
1753 if (l.writemask & (1 << i)) {
1754 first_enabled_chan = GET_SWZ(r.swizzle, i);
1755 break;
1756 }
1757 }
1758
1759 /* Swizzle a small RHS vector into the channels being written.
1760 *
1761 * glsl ir treats write_mask as dictating how many channels are
1762 * present on the RHS while Mesa IR treats write_mask as just
1763 * showing which channels of the vec4 RHS get written.
1764 */
1765 for (int i = 0; i < 4; i++) {
1766 if (l.writemask & (1 << i))
1767 swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++);
1768 else
1769 swizzles[i] = first_enabled_chan;
1770 }
1771 r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1],
1772 swizzles[2], swizzles[3]);
1773 }
1774
1775 assert(l.file != PROGRAM_UNDEFINED);
1776 assert(r.file != PROGRAM_UNDEFINED);
1777
1778 if (ir->condition) {
1779 const bool switch_order = this->process_move_condition(ir->condition);
1780 src_reg condition = this->result;
1781
1782 for (i = 0; i < type_size(ir->lhs->type); i++) {
1783 if (switch_order) {
1784 emit(ir, OPCODE_CMP, l, condition, src_reg(l), r);
1785 } else {
1786 emit(ir, OPCODE_CMP, l, condition, r, src_reg(l));
1787 }
1788
1789 l.index++;
1790 r.index++;
1791 }
1792 } else {
1793 for (i = 0; i < type_size(ir->lhs->type); i++) {
1794 emit(ir, OPCODE_MOV, l, r);
1795 l.index++;
1796 r.index++;
1797 }
1798 }
1799 }
1800
1801
1802 void
1803 ir_to_mesa_visitor::visit(ir_constant *ir)
1804 {
1805 src_reg src;
1806 GLfloat stack_vals[4] = { 0 };
1807 GLfloat *values = stack_vals;
1808 unsigned int i;
1809
1810 /* Unfortunately, 4 floats is all we can get into
1811 * _mesa_add_unnamed_constant. So, make a temp to store an
1812 * aggregate constant and move each constant value into it. If we
1813 * get lucky, copy propagation will eliminate the extra moves.
1814 */
1815
1816 if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1817 src_reg temp_base = get_temp(ir->type);
1818 dst_reg temp = dst_reg(temp_base);
1819
1820 foreach_iter(exec_list_iterator, iter, ir->components) {
1821 ir_constant *field_value = (ir_constant *)iter.get();
1822 int size = type_size(field_value->type);
1823
1824 assert(size > 0);
1825
1826 field_value->accept(this);
1827 src = this->result;
1828
1829 for (i = 0; i < (unsigned int)size; i++) {
1830 emit(ir, OPCODE_MOV, temp, src);
1831
1832 src.index++;
1833 temp.index++;
1834 }
1835 }
1836 this->result = temp_base;
1837 return;
1838 }
1839
1840 if (ir->type->is_array()) {
1841 src_reg temp_base = get_temp(ir->type);
1842 dst_reg temp = dst_reg(temp_base);
1843 int size = type_size(ir->type->fields.array);
1844
1845 assert(size > 0);
1846
1847 for (i = 0; i < ir->type->length; i++) {
1848 ir->array_elements[i]->accept(this);
1849 src = this->result;
1850 for (int j = 0; j < size; j++) {
1851 emit(ir, OPCODE_MOV, temp, src);
1852
1853 src.index++;
1854 temp.index++;
1855 }
1856 }
1857 this->result = temp_base;
1858 return;
1859 }
1860
1861 if (ir->type->is_matrix()) {
1862 src_reg mat = get_temp(ir->type);
1863 dst_reg mat_column = dst_reg(mat);
1864
1865 for (i = 0; i < ir->type->matrix_columns; i++) {
1866 assert(ir->type->base_type == GLSL_TYPE_FLOAT);
1867 values = &ir->value.f[i * ir->type->vector_elements];
1868
1869 src = src_reg(PROGRAM_CONSTANT, -1, NULL);
1870 src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
1871 (gl_constant_value *) values,
1872 ir->type->vector_elements,
1873 &src.swizzle);
1874 emit(ir, OPCODE_MOV, mat_column, src);
1875
1876 mat_column.index++;
1877 }
1878
1879 this->result = mat;
1880 return;
1881 }
1882
1883 src.file = PROGRAM_CONSTANT;
1884 switch (ir->type->base_type) {
1885 case GLSL_TYPE_FLOAT:
1886 values = &ir->value.f[0];
1887 break;
1888 case GLSL_TYPE_UINT:
1889 for (i = 0; i < ir->type->vector_elements; i++) {
1890 values[i] = ir->value.u[i];
1891 }
1892 break;
1893 case GLSL_TYPE_INT:
1894 for (i = 0; i < ir->type->vector_elements; i++) {
1895 values[i] = ir->value.i[i];
1896 }
1897 break;
1898 case GLSL_TYPE_BOOL:
1899 for (i = 0; i < ir->type->vector_elements; i++) {
1900 values[i] = ir->value.b[i];
1901 }
1902 break;
1903 default:
1904 assert(!"Non-float/uint/int/bool constant");
1905 }
1906
1907 this->result = src_reg(PROGRAM_CONSTANT, -1, ir->type);
1908 this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters,
1909 (gl_constant_value *) values,
1910 ir->type->vector_elements,
1911 &this->result.swizzle);
1912 }
1913
1914 function_entry *
1915 ir_to_mesa_visitor::get_function_signature(ir_function_signature *sig)
1916 {
1917 function_entry *entry;
1918
1919 foreach_iter(exec_list_iterator, iter, this->function_signatures) {
1920 entry = (function_entry *)iter.get();
1921
1922 if (entry->sig == sig)
1923 return entry;
1924 }
1925
1926 entry = ralloc(mem_ctx, function_entry);
1927 entry->sig = sig;
1928 entry->sig_id = this->next_signature_id++;
1929 entry->bgn_inst = NULL;
1930
1931 /* Allocate storage for all the parameters. */
1932 foreach_iter(exec_list_iterator, iter, sig->parameters) {
1933 ir_variable *param = (ir_variable *)iter.get();
1934 variable_storage *storage;
1935
1936 storage = find_variable_storage(param);
1937 assert(!storage);
1938
1939 storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY,
1940 this->next_temp);
1941 this->variables.push_tail(storage);
1942
1943 this->next_temp += type_size(param->type);
1944 }
1945
1946 if (!sig->return_type->is_void()) {
1947 entry->return_reg = get_temp(sig->return_type);
1948 } else {
1949 entry->return_reg = undef_src;
1950 }
1951
1952 this->function_signatures.push_tail(entry);
1953 return entry;
1954 }
1955
1956 void
1957 ir_to_mesa_visitor::visit(ir_call *ir)
1958 {
1959 ir_to_mesa_instruction *call_inst;
1960 ir_function_signature *sig = ir->get_callee();
1961 function_entry *entry = get_function_signature(sig);
1962 int i;
1963
1964 /* Process in parameters. */
1965 exec_list_iterator sig_iter = sig->parameters.iterator();
1966 foreach_iter(exec_list_iterator, iter, *ir) {
1967 ir_rvalue *param_rval = (ir_rvalue *)iter.get();
1968 ir_variable *param = (ir_variable *)sig_iter.get();
1969
1970 if (param->mode == ir_var_in ||
1971 param->mode == ir_var_inout) {
1972 variable_storage *storage = find_variable_storage(param);
1973 assert(storage);
1974
1975 param_rval->accept(this);
1976 src_reg r = this->result;
1977
1978 dst_reg l;
1979 l.file = storage->file;
1980 l.index = storage->index;
1981 l.reladdr = NULL;
1982 l.writemask = WRITEMASK_XYZW;
1983 l.cond_mask = COND_TR;
1984
1985 for (i = 0; i < type_size(param->type); i++) {
1986 emit(ir, OPCODE_MOV, l, r);
1987 l.index++;
1988 r.index++;
1989 }
1990 }
1991
1992 sig_iter.next();
1993 }
1994 assert(!sig_iter.has_next());
1995
1996 /* Emit call instruction */
1997 call_inst = emit(ir, OPCODE_CAL);
1998 call_inst->function = entry;
1999
2000 /* Process out parameters. */
2001 sig_iter = sig->parameters.iterator();
2002 foreach_iter(exec_list_iterator, iter, *ir) {
2003 ir_rvalue *param_rval = (ir_rvalue *)iter.get();
2004 ir_variable *param = (ir_variable *)sig_iter.get();
2005
2006 if (param->mode == ir_var_out ||
2007 param->mode == ir_var_inout) {
2008 variable_storage *storage = find_variable_storage(param);
2009 assert(storage);
2010
2011 src_reg r;
2012 r.file = storage->file;
2013 r.index = storage->index;
2014 r.reladdr = NULL;
2015 r.swizzle = SWIZZLE_NOOP;
2016 r.negate = 0;
2017
2018 param_rval->accept(this);
2019 dst_reg l = dst_reg(this->result);
2020
2021 for (i = 0; i < type_size(param->type); i++) {
2022 emit(ir, OPCODE_MOV, l, r);
2023 l.index++;
2024 r.index++;
2025 }
2026 }
2027
2028 sig_iter.next();
2029 }
2030 assert(!sig_iter.has_next());
2031
2032 /* Process return value. */
2033 this->result = entry->return_reg;
2034 }
2035
2036 void
2037 ir_to_mesa_visitor::visit(ir_texture *ir)
2038 {
2039 src_reg result_src, coord, lod_info, projector, dx, dy;
2040 dst_reg result_dst, coord_dst;
2041 ir_to_mesa_instruction *inst = NULL;
2042 prog_opcode opcode = OPCODE_NOP;
2043
2044 ir->coordinate->accept(this);
2045
2046 /* Put our coords in a temp. We'll need to modify them for shadow,
2047 * projection, or LOD, so the only case we'd use it as is is if
2048 * we're doing plain old texturing. Mesa IR optimization should
2049 * handle cleaning up our mess in that case.
2050 */
2051 coord = get_temp(glsl_type::vec4_type);
2052 coord_dst = dst_reg(coord);
2053 emit(ir, OPCODE_MOV, coord_dst, this->result);
2054
2055 if (ir->projector) {
2056 ir->projector->accept(this);
2057 projector = this->result;
2058 }
2059
2060 /* Storage for our result. Ideally for an assignment we'd be using
2061 * the actual storage for the result here, instead.
2062 */
2063 result_src = get_temp(glsl_type::vec4_type);
2064 result_dst = dst_reg(result_src);
2065
2066 switch (ir->op) {
2067 case ir_tex:
2068 opcode = OPCODE_TEX;
2069 break;
2070 case ir_txb:
2071 opcode = OPCODE_TXB;
2072 ir->lod_info.bias->accept(this);
2073 lod_info = this->result;
2074 break;
2075 case ir_txl:
2076 opcode = OPCODE_TXL;
2077 ir->lod_info.lod->accept(this);
2078 lod_info = this->result;
2079 break;
2080 case ir_txd:
2081 opcode = OPCODE_TXD;
2082 ir->lod_info.grad.dPdx->accept(this);
2083 dx = this->result;
2084 ir->lod_info.grad.dPdy->accept(this);
2085 dy = this->result;
2086 break;
2087 case ir_txf:
2088 assert(!"GLSL 1.30 features unsupported");
2089 break;
2090 }
2091
2092 if (ir->projector) {
2093 if (opcode == OPCODE_TEX) {
2094 /* Slot the projector in as the last component of the coord. */
2095 coord_dst.writemask = WRITEMASK_W;
2096 emit(ir, OPCODE_MOV, coord_dst, projector);
2097 coord_dst.writemask = WRITEMASK_XYZW;
2098 opcode = OPCODE_TXP;
2099 } else {
2100 src_reg coord_w = coord;
2101 coord_w.swizzle = SWIZZLE_WWWW;
2102
2103 /* For the other TEX opcodes there's no projective version
2104 * since the last slot is taken up by lod info. Do the
2105 * projective divide now.
2106 */
2107 coord_dst.writemask = WRITEMASK_W;
2108 emit(ir, OPCODE_RCP, coord_dst, projector);
2109
2110 /* In the case where we have to project the coordinates "by hand,"
2111 * the shadow comparitor value must also be projected.
2112 */
2113 src_reg tmp_src = coord;
2114 if (ir->shadow_comparitor) {
2115 /* Slot the shadow value in as the second to last component of the
2116 * coord.
2117 */
2118 ir->shadow_comparitor->accept(this);
2119
2120 tmp_src = get_temp(glsl_type::vec4_type);
2121 dst_reg tmp_dst = dst_reg(tmp_src);
2122
2123 tmp_dst.writemask = WRITEMASK_Z;
2124 emit(ir, OPCODE_MOV, tmp_dst, this->result);
2125
2126 tmp_dst.writemask = WRITEMASK_XY;
2127 emit(ir, OPCODE_MOV, tmp_dst, coord);
2128 }
2129
2130 coord_dst.writemask = WRITEMASK_XYZ;
2131 emit(ir, OPCODE_MUL, coord_dst, tmp_src, coord_w);
2132
2133 coord_dst.writemask = WRITEMASK_XYZW;
2134 coord.swizzle = SWIZZLE_XYZW;
2135 }
2136 }
2137
2138 /* If projection is done and the opcode is not OPCODE_TXP, then the shadow
2139 * comparitor was put in the correct place (and projected) by the code,
2140 * above, that handles by-hand projection.
2141 */
2142 if (ir->shadow_comparitor && (!ir->projector || opcode == OPCODE_TXP)) {
2143 /* Slot the shadow value in as the second to last component of the
2144 * coord.
2145 */
2146 ir->shadow_comparitor->accept(this);
2147 coord_dst.writemask = WRITEMASK_Z;
2148 emit(ir, OPCODE_MOV, coord_dst, this->result);
2149 coord_dst.writemask = WRITEMASK_XYZW;
2150 }
2151
2152 if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) {
2153 /* Mesa IR stores lod or lod bias in the last channel of the coords. */
2154 coord_dst.writemask = WRITEMASK_W;
2155 emit(ir, OPCODE_MOV, coord_dst, lod_info);
2156 coord_dst.writemask = WRITEMASK_XYZW;
2157 }
2158
2159 if (opcode == OPCODE_TXD)
2160 inst = emit(ir, opcode, result_dst, coord, dx, dy);
2161 else
2162 inst = emit(ir, opcode, result_dst, coord);
2163
2164 if (ir->shadow_comparitor)
2165 inst->tex_shadow = GL_TRUE;
2166
2167 inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler,
2168 this->shader_program,
2169 this->prog);
2170
2171 const glsl_type *sampler_type = ir->sampler->type;
2172
2173 switch (sampler_type->sampler_dimensionality) {
2174 case GLSL_SAMPLER_DIM_1D:
2175 inst->tex_target = (sampler_type->sampler_array)
2176 ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
2177 break;
2178 case GLSL_SAMPLER_DIM_2D:
2179 inst->tex_target = (sampler_type->sampler_array)
2180 ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
2181 break;
2182 case GLSL_SAMPLER_DIM_3D:
2183 inst->tex_target = TEXTURE_3D_INDEX;
2184 break;
2185 case GLSL_SAMPLER_DIM_CUBE:
2186 inst->tex_target = TEXTURE_CUBE_INDEX;
2187 break;
2188 case GLSL_SAMPLER_DIM_RECT:
2189 inst->tex_target = TEXTURE_RECT_INDEX;
2190 break;
2191 case GLSL_SAMPLER_DIM_BUF:
2192 assert(!"FINISHME: Implement ARB_texture_buffer_object");
2193 break;
2194 default:
2195 assert(!"Should not get here.");
2196 }
2197
2198 this->result = result_src;
2199 }
2200
2201 void
2202 ir_to_mesa_visitor::visit(ir_return *ir)
2203 {
2204 if (ir->get_value()) {
2205 dst_reg l;
2206 int i;
2207
2208 assert(current_function);
2209
2210 ir->get_value()->accept(this);
2211 src_reg r = this->result;
2212
2213 l = dst_reg(current_function->return_reg);
2214
2215 for (i = 0; i < type_size(current_function->sig->return_type); i++) {
2216 emit(ir, OPCODE_MOV, l, r);
2217 l.index++;
2218 r.index++;
2219 }
2220 }
2221
2222 emit(ir, OPCODE_RET);
2223 }
2224
2225 void
2226 ir_to_mesa_visitor::visit(ir_discard *ir)
2227 {
2228 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
2229
2230 if (ir->condition) {
2231 ir->condition->accept(this);
2232 this->result.negate = ~this->result.negate;
2233 emit(ir, OPCODE_KIL, undef_dst, this->result);
2234 } else {
2235 emit(ir, OPCODE_KIL_NV);
2236 }
2237
2238 fp->UsesKill = GL_TRUE;
2239 }
2240
2241 void
2242 ir_to_mesa_visitor::visit(ir_if *ir)
2243 {
2244 ir_to_mesa_instruction *cond_inst, *if_inst;
2245 ir_to_mesa_instruction *prev_inst;
2246
2247 prev_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
2248
2249 ir->condition->accept(this);
2250 assert(this->result.file != PROGRAM_UNDEFINED);
2251
2252 if (this->options->EmitCondCodes) {
2253 cond_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
2254
2255 /* See if we actually generated any instruction for generating
2256 * the condition. If not, then cook up a move to a temp so we
2257 * have something to set cond_update on.
2258 */
2259 if (cond_inst == prev_inst) {
2260 src_reg temp = get_temp(glsl_type::bool_type);
2261 cond_inst = emit(ir->condition, OPCODE_MOV, dst_reg(temp), result);
2262 }
2263 cond_inst->cond_update = GL_TRUE;
2264
2265 if_inst = emit(ir->condition, OPCODE_IF);
2266 if_inst->dst.cond_mask = COND_NE;
2267 } else {
2268 if_inst = emit(ir->condition, OPCODE_IF, undef_dst, this->result);
2269 }
2270
2271 this->instructions.push_tail(if_inst);
2272
2273 visit_exec_list(&ir->then_instructions, this);
2274
2275 if (!ir->else_instructions.is_empty()) {
2276 emit(ir->condition, OPCODE_ELSE);
2277 visit_exec_list(&ir->else_instructions, this);
2278 }
2279
2280 if_inst = emit(ir->condition, OPCODE_ENDIF);
2281 }
2282
2283 ir_to_mesa_visitor::ir_to_mesa_visitor()
2284 {
2285 result.file = PROGRAM_UNDEFINED;
2286 next_temp = 1;
2287 next_signature_id = 1;
2288 current_function = NULL;
2289 mem_ctx = ralloc_context(NULL);
2290 }
2291
2292 ir_to_mesa_visitor::~ir_to_mesa_visitor()
2293 {
2294 ralloc_free(mem_ctx);
2295 }
2296
2297 static struct prog_src_register
2298 mesa_src_reg_from_ir_src_reg(src_reg reg)
2299 {
2300 struct prog_src_register mesa_reg;
2301
2302 mesa_reg.File = reg.file;
2303 assert(reg.index < (1 << INST_INDEX_BITS));
2304 mesa_reg.Index = reg.index;
2305 mesa_reg.Swizzle = reg.swizzle;
2306 mesa_reg.RelAddr = reg.reladdr != NULL;
2307 mesa_reg.Negate = reg.negate;
2308 mesa_reg.Abs = 0;
2309 mesa_reg.HasIndex2 = GL_FALSE;
2310 mesa_reg.RelAddr2 = 0;
2311 mesa_reg.Index2 = 0;
2312
2313 return mesa_reg;
2314 }
2315
2316 static void
2317 set_branchtargets(ir_to_mesa_visitor *v,
2318 struct prog_instruction *mesa_instructions,
2319 int num_instructions)
2320 {
2321 int if_count = 0, loop_count = 0;
2322 int *if_stack, *loop_stack;
2323 int if_stack_pos = 0, loop_stack_pos = 0;
2324 int i, j;
2325
2326 for (i = 0; i < num_instructions; i++) {
2327 switch (mesa_instructions[i].Opcode) {
2328 case OPCODE_IF:
2329 if_count++;
2330 break;
2331 case OPCODE_BGNLOOP:
2332 loop_count++;
2333 break;
2334 case OPCODE_BRK:
2335 case OPCODE_CONT:
2336 mesa_instructions[i].BranchTarget = -1;
2337 break;
2338 default:
2339 break;
2340 }
2341 }
2342
2343 if_stack = rzalloc_array(v->mem_ctx, int, if_count);
2344 loop_stack = rzalloc_array(v->mem_ctx, int, loop_count);
2345
2346 for (i = 0; i < num_instructions; i++) {
2347 switch (mesa_instructions[i].Opcode) {
2348 case OPCODE_IF:
2349 if_stack[if_stack_pos] = i;
2350 if_stack_pos++;
2351 break;
2352 case OPCODE_ELSE:
2353 mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
2354 if_stack[if_stack_pos - 1] = i;
2355 break;
2356 case OPCODE_ENDIF:
2357 mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
2358 if_stack_pos--;
2359 break;
2360 case OPCODE_BGNLOOP:
2361 loop_stack[loop_stack_pos] = i;
2362 loop_stack_pos++;
2363 break;
2364 case OPCODE_ENDLOOP:
2365 loop_stack_pos--;
2366 /* Rewrite any breaks/conts at this nesting level (haven't
2367 * already had a BranchTarget assigned) to point to the end
2368 * of the loop.
2369 */
2370 for (j = loop_stack[loop_stack_pos]; j < i; j++) {
2371 if (mesa_instructions[j].Opcode == OPCODE_BRK ||
2372 mesa_instructions[j].Opcode == OPCODE_CONT) {
2373 if (mesa_instructions[j].BranchTarget == -1) {
2374 mesa_instructions[j].BranchTarget = i;
2375 }
2376 }
2377 }
2378 /* The loop ends point at each other. */
2379 mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos];
2380 mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i;
2381 break;
2382 case OPCODE_CAL:
2383 foreach_iter(exec_list_iterator, iter, v->function_signatures) {
2384 function_entry *entry = (function_entry *)iter.get();
2385
2386 if (entry->sig_id == mesa_instructions[i].BranchTarget) {
2387 mesa_instructions[i].BranchTarget = entry->inst;
2388 break;
2389 }
2390 }
2391 break;
2392 default:
2393 break;
2394 }
2395 }
2396 }
2397
2398 static void
2399 print_program(struct prog_instruction *mesa_instructions,
2400 ir_instruction **mesa_instruction_annotation,
2401 int num_instructions)
2402 {
2403 ir_instruction *last_ir = NULL;
2404 int i;
2405 int indent = 0;
2406
2407 for (i = 0; i < num_instructions; i++) {
2408 struct prog_instruction *mesa_inst = mesa_instructions + i;
2409 ir_instruction *ir = mesa_instruction_annotation[i];
2410
2411 fprintf(stdout, "%3d: ", i);
2412
2413 if (last_ir != ir && ir) {
2414 int j;
2415
2416 for (j = 0; j < indent; j++) {
2417 fprintf(stdout, " ");
2418 }
2419 ir->print();
2420 printf("\n");
2421 last_ir = ir;
2422
2423 fprintf(stdout, " "); /* line number spacing. */
2424 }
2425
2426 indent = _mesa_fprint_instruction_opt(stdout, mesa_inst, indent,
2427 PROG_PRINT_DEBUG, NULL);
2428 }
2429 }
2430
2431
2432 /**
2433 * Count resources used by the given gpu program (number of texture
2434 * samplers, etc).
2435 */
2436 static void
2437 count_resources(struct gl_program *prog)
2438 {
2439 unsigned int i;
2440
2441 prog->SamplersUsed = 0;
2442
2443 for (i = 0; i < prog->NumInstructions; i++) {
2444 struct prog_instruction *inst = &prog->Instructions[i];
2445
2446 if (_mesa_is_tex_instruction(inst->Opcode)) {
2447 prog->SamplerTargets[inst->TexSrcUnit] =
2448 (gl_texture_index)inst->TexSrcTarget;
2449 prog->SamplersUsed |= 1 << inst->TexSrcUnit;
2450 if (inst->TexShadow) {
2451 prog->ShadowSamplers |= 1 << inst->TexSrcUnit;
2452 }
2453 }
2454 }
2455
2456 _mesa_update_shader_textures_used(prog);
2457 }
2458
2459
2460 /**
2461 * Check if the given vertex/fragment/shader program is within the
2462 * resource limits of the context (number of texture units, etc).
2463 * If any of those checks fail, record a linker error.
2464 *
2465 * XXX more checks are needed...
2466 */
2467 static void
2468 check_resources(const struct gl_context *ctx,
2469 struct gl_shader_program *shader_program,
2470 struct gl_program *prog)
2471 {
2472 switch (prog->Target) {
2473 case GL_VERTEX_PROGRAM_ARB:
2474 if (_mesa_bitcount(prog->SamplersUsed) >
2475 ctx->Const.MaxVertexTextureImageUnits) {
2476 linker_error(shader_program,
2477 "Too many vertex shader texture samplers");
2478 }
2479 if (prog->Parameters->NumParameters > MAX_UNIFORMS) {
2480 linker_error(shader_program, "Too many vertex shader constants");
2481 }
2482 break;
2483 case MESA_GEOMETRY_PROGRAM:
2484 if (_mesa_bitcount(prog->SamplersUsed) >
2485 ctx->Const.MaxGeometryTextureImageUnits) {
2486 linker_error(shader_program,
2487 "Too many geometry shader texture samplers");
2488 }
2489 if (prog->Parameters->NumParameters >
2490 MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) {
2491 linker_error(shader_program, "Too many geometry shader constants");
2492 }
2493 break;
2494 case GL_FRAGMENT_PROGRAM_ARB:
2495 if (_mesa_bitcount(prog->SamplersUsed) >
2496 ctx->Const.MaxTextureImageUnits) {
2497 linker_error(shader_program,
2498 "Too many fragment shader texture samplers");
2499 }
2500 if (prog->Parameters->NumParameters > MAX_UNIFORMS) {
2501 linker_error(shader_program, "Too many fragment shader constants");
2502 }
2503 break;
2504 default:
2505 _mesa_problem(ctx, "unexpected program type in check_resources()");
2506 }
2507 }
2508
2509
2510
2511 struct uniform_sort {
2512 struct gl_uniform *u;
2513 int pos;
2514 };
2515
2516 /* The shader_program->Uniforms list is almost sorted in increasing
2517 * uniform->{Frag,Vert}Pos locations, but not quite when there are
2518 * uniforms shared between targets. We need to add parameters in
2519 * increasing order for the targets.
2520 */
2521 static int
2522 sort_uniforms(const void *a, const void *b)
2523 {
2524 struct uniform_sort *u1 = (struct uniform_sort *)a;
2525 struct uniform_sort *u2 = (struct uniform_sort *)b;
2526
2527 return u1->pos - u2->pos;
2528 }
2529
2530 /* Add the uniforms to the parameters. The linker chose locations
2531 * in our parameters lists (which weren't created yet), which the
2532 * uniforms code will use to poke values into our parameters list
2533 * when uniforms are updated.
2534 */
2535 static void
2536 add_uniforms_to_parameters_list(struct gl_shader_program *shader_program,
2537 struct gl_shader *shader,
2538 struct gl_program *prog)
2539 {
2540 unsigned int i;
2541 unsigned int next_sampler = 0, num_uniforms = 0;
2542 struct uniform_sort *sorted_uniforms;
2543
2544 sorted_uniforms = ralloc_array(NULL, struct uniform_sort,
2545 shader_program->Uniforms->NumUniforms);
2546
2547 for (i = 0; i < shader_program->Uniforms->NumUniforms; i++) {
2548 struct gl_uniform *uniform = shader_program->Uniforms->Uniforms + i;
2549 int parameter_index = -1;
2550
2551 switch (shader->Type) {
2552 case GL_VERTEX_SHADER:
2553 parameter_index = uniform->VertPos;
2554 break;
2555 case GL_FRAGMENT_SHADER:
2556 parameter_index = uniform->FragPos;
2557 break;
2558 case GL_GEOMETRY_SHADER:
2559 parameter_index = uniform->GeomPos;
2560 break;
2561 }
2562
2563 /* Only add uniforms used in our target. */
2564 if (parameter_index != -1) {
2565 sorted_uniforms[num_uniforms].pos = parameter_index;
2566 sorted_uniforms[num_uniforms].u = uniform;
2567 num_uniforms++;
2568 }
2569 }
2570
2571 qsort(sorted_uniforms, num_uniforms, sizeof(struct uniform_sort),
2572 sort_uniforms);
2573
2574 for (i = 0; i < num_uniforms; i++) {
2575 struct gl_uniform *uniform = sorted_uniforms[i].u;
2576 int parameter_index = sorted_uniforms[i].pos;
2577 const glsl_type *type = uniform->Type;
2578 unsigned int size;
2579
2580 if (type->is_vector() ||
2581 type->is_scalar()) {
2582 size = type->vector_elements;
2583 } else {
2584 size = type_size(type) * 4;
2585 }
2586
2587 gl_register_file file;
2588 if (type->is_sampler() ||
2589 (type->is_array() && type->fields.array->is_sampler())) {
2590 file = PROGRAM_SAMPLER;
2591 } else {
2592 file = PROGRAM_UNIFORM;
2593 }
2594
2595 GLint index = _mesa_lookup_parameter_index(prog->Parameters, -1,
2596 uniform->Name);
2597
2598 if (index < 0) {
2599 index = _mesa_add_parameter(prog->Parameters, file,
2600 uniform->Name, size, type->gl_type,
2601 NULL, NULL, 0x0);
2602
2603 /* Sampler uniform values are stored in prog->SamplerUnits,
2604 * and the entry in that array is selected by this index we
2605 * store in ParameterValues[].
2606 */
2607 if (file == PROGRAM_SAMPLER) {
2608 for (unsigned int j = 0; j < size / 4; j++)
2609 prog->Parameters->ParameterValues[index + j][0].f = next_sampler++;
2610 }
2611
2612 /* The location chosen in the Parameters list here (returned
2613 * from _mesa_add_uniform) has to match what the linker chose.
2614 */
2615 if (index != parameter_index) {
2616 linker_error(shader_program,
2617 "Allocation of uniform `%s' to target failed "
2618 "(%d vs %d)\n",
2619 uniform->Name, index, parameter_index);
2620 }
2621 }
2622 }
2623
2624 ralloc_free(sorted_uniforms);
2625 }
2626
2627 static void
2628 set_uniform_initializer(struct gl_context *ctx, void *mem_ctx,
2629 struct gl_shader_program *shader_program,
2630 const char *name, const glsl_type *type,
2631 ir_constant *val)
2632 {
2633 if (type->is_record()) {
2634 ir_constant *field_constant;
2635
2636 field_constant = (ir_constant *)val->components.get_head();
2637
2638 for (unsigned int i = 0; i < type->length; i++) {
2639 const glsl_type *field_type = type->fields.structure[i].type;
2640 const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name,
2641 type->fields.structure[i].name);
2642 set_uniform_initializer(ctx, mem_ctx, shader_program, field_name,
2643 field_type, field_constant);
2644 field_constant = (ir_constant *)field_constant->next;
2645 }
2646 return;
2647 }
2648
2649 int loc = _mesa_get_uniform_location(ctx, shader_program, name);
2650
2651 if (loc == -1) {
2652 linker_error(shader_program,
2653 "Couldn't find uniform for initializer %s\n", name);
2654 return;
2655 }
2656
2657 for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) {
2658 ir_constant *element;
2659 const glsl_type *element_type;
2660 if (type->is_array()) {
2661 element = val->array_elements[i];
2662 element_type = type->fields.array;
2663 } else {
2664 element = val;
2665 element_type = type;
2666 }
2667
2668 void *values;
2669
2670 if (element_type->base_type == GLSL_TYPE_BOOL) {
2671 int *conv = ralloc_array(mem_ctx, int, element_type->components());
2672 for (unsigned int j = 0; j < element_type->components(); j++) {
2673 conv[j] = element->value.b[j];
2674 }
2675 values = (void *)conv;
2676 element_type = glsl_type::get_instance(GLSL_TYPE_INT,
2677 element_type->vector_elements,
2678 1);
2679 } else {
2680 values = &element->value;
2681 }
2682
2683 if (element_type->is_matrix()) {
2684 _mesa_uniform_matrix(ctx, shader_program,
2685 element_type->matrix_columns,
2686 element_type->vector_elements,
2687 loc, 1, GL_FALSE, (GLfloat *)values);
2688 loc += element_type->matrix_columns;
2689 } else {
2690 _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns,
2691 values, element_type->gl_type);
2692 loc += type_size(element_type);
2693 }
2694 }
2695 }
2696
2697 static void
2698 set_uniform_initializers(struct gl_context *ctx,
2699 struct gl_shader_program *shader_program)
2700 {
2701 void *mem_ctx = NULL;
2702
2703 for (unsigned int i = 0; i < MESA_SHADER_TYPES; i++) {
2704 struct gl_shader *shader = shader_program->_LinkedShaders[i];
2705
2706 if (shader == NULL)
2707 continue;
2708
2709 foreach_iter(exec_list_iterator, iter, *shader->ir) {
2710 ir_instruction *ir = (ir_instruction *)iter.get();
2711 ir_variable *var = ir->as_variable();
2712
2713 if (!var || var->mode != ir_var_uniform || !var->constant_value)
2714 continue;
2715
2716 if (!mem_ctx)
2717 mem_ctx = ralloc_context(NULL);
2718
2719 set_uniform_initializer(ctx, mem_ctx, shader_program, var->name,
2720 var->type, var->constant_value);
2721 }
2722 }
2723
2724 ralloc_free(mem_ctx);
2725 }
2726
2727 /*
2728 * On a basic block basis, tracks available PROGRAM_TEMPORARY register
2729 * channels for copy propagation and updates following instructions to
2730 * use the original versions.
2731 *
2732 * The ir_to_mesa_visitor lazily produces code assuming that this pass
2733 * will occur. As an example, a TXP production before this pass:
2734 *
2735 * 0: MOV TEMP[1], INPUT[4].xyyy;
2736 * 1: MOV TEMP[1].w, INPUT[4].wwww;
2737 * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
2738 *
2739 * and after:
2740 *
2741 * 0: MOV TEMP[1], INPUT[4].xyyy;
2742 * 1: MOV TEMP[1].w, INPUT[4].wwww;
2743 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
2744 *
2745 * which allows for dead code elimination on TEMP[1]'s writes.
2746 */
2747 void
2748 ir_to_mesa_visitor::copy_propagate(void)
2749 {
2750 ir_to_mesa_instruction **acp = rzalloc_array(mem_ctx,
2751 ir_to_mesa_instruction *,
2752 this->next_temp * 4);
2753 int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
2754 int level = 0;
2755
2756 foreach_iter(exec_list_iterator, iter, this->instructions) {
2757 ir_to_mesa_instruction *inst = (ir_to_mesa_instruction *)iter.get();
2758
2759 assert(inst->dst.file != PROGRAM_TEMPORARY
2760 || inst->dst.index < this->next_temp);
2761
2762 /* First, do any copy propagation possible into the src regs. */
2763 for (int r = 0; r < 3; r++) {
2764 ir_to_mesa_instruction *first = NULL;
2765 bool good = true;
2766 int acp_base = inst->src[r].index * 4;
2767
2768 if (inst->src[r].file != PROGRAM_TEMPORARY ||
2769 inst->src[r].reladdr)
2770 continue;
2771
2772 /* See if we can find entries in the ACP consisting of MOVs
2773 * from the same src register for all the swizzled channels
2774 * of this src register reference.
2775 */
2776 for (int i = 0; i < 4; i++) {
2777 int src_chan = GET_SWZ(inst->src[r].swizzle, i);
2778 ir_to_mesa_instruction *copy_chan = acp[acp_base + src_chan];
2779
2780 if (!copy_chan) {
2781 good = false;
2782 break;
2783 }
2784
2785 assert(acp_level[acp_base + src_chan] <= level);
2786
2787 if (!first) {
2788 first = copy_chan;
2789 } else {
2790 if (first->src[0].file != copy_chan->src[0].file ||
2791 first->src[0].index != copy_chan->src[0].index) {
2792 good = false;
2793 break;
2794 }
2795 }
2796 }
2797
2798 if (good) {
2799 /* We've now validated that we can copy-propagate to
2800 * replace this src register reference. Do it.
2801 */
2802 inst->src[r].file = first->src[0].file;
2803 inst->src[r].index = first->src[0].index;
2804
2805 int swizzle = 0;
2806 for (int i = 0; i < 4; i++) {
2807 int src_chan = GET_SWZ(inst->src[r].swizzle, i);
2808 ir_to_mesa_instruction *copy_inst = acp[acp_base + src_chan];
2809 swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) <<
2810 (3 * i));
2811 }
2812 inst->src[r].swizzle = swizzle;
2813 }
2814 }
2815
2816 switch (inst->op) {
2817 case OPCODE_BGNLOOP:
2818 case OPCODE_ENDLOOP:
2819 /* End of a basic block, clear the ACP entirely. */
2820 memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
2821 break;
2822
2823 case OPCODE_IF:
2824 ++level;
2825 break;
2826
2827 case OPCODE_ENDIF:
2828 case OPCODE_ELSE:
2829 /* Clear all channels written inside the block from the ACP, but
2830 * leaving those that were not touched.
2831 */
2832 for (int r = 0; r < this->next_temp; r++) {
2833 for (int c = 0; c < 4; c++) {
2834 if (!acp[4 * r + c])
2835 continue;
2836
2837 if (acp_level[4 * r + c] >= level)
2838 acp[4 * r + c] = NULL;
2839 }
2840 }
2841 if (inst->op == OPCODE_ENDIF)
2842 --level;
2843 break;
2844
2845 default:
2846 /* Continuing the block, clear any written channels from
2847 * the ACP.
2848 */
2849 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) {
2850 /* Any temporary might be written, so no copy propagation
2851 * across this instruction.
2852 */
2853 memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
2854 } else if (inst->dst.file == PROGRAM_OUTPUT &&
2855 inst->dst.reladdr) {
2856 /* Any output might be written, so no copy propagation
2857 * from outputs across this instruction.
2858 */
2859 for (int r = 0; r < this->next_temp; r++) {
2860 for (int c = 0; c < 4; c++) {
2861 if (!acp[4 * r + c])
2862 continue;
2863
2864 if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT)
2865 acp[4 * r + c] = NULL;
2866 }
2867 }
2868 } else if (inst->dst.file == PROGRAM_TEMPORARY ||
2869 inst->dst.file == PROGRAM_OUTPUT) {
2870 /* Clear where it's used as dst. */
2871 if (inst->dst.file == PROGRAM_TEMPORARY) {
2872 for (int c = 0; c < 4; c++) {
2873 if (inst->dst.writemask & (1 << c)) {
2874 acp[4 * inst->dst.index + c] = NULL;
2875 }
2876 }
2877 }
2878
2879 /* Clear where it's used as src. */
2880 for (int r = 0; r < this->next_temp; r++) {
2881 for (int c = 0; c < 4; c++) {
2882 if (!acp[4 * r + c])
2883 continue;
2884
2885 int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c);
2886
2887 if (acp[4 * r + c]->src[0].file == inst->dst.file &&
2888 acp[4 * r + c]->src[0].index == inst->dst.index &&
2889 inst->dst.writemask & (1 << src_chan))
2890 {
2891 acp[4 * r + c] = NULL;
2892 }
2893 }
2894 }
2895 }
2896 break;
2897 }
2898
2899 /* If this is a copy, add it to the ACP. */
2900 if (inst->op == OPCODE_MOV &&
2901 inst->dst.file == PROGRAM_TEMPORARY &&
2902 !inst->dst.reladdr &&
2903 !inst->saturate &&
2904 !inst->src[0].reladdr &&
2905 !inst->src[0].negate) {
2906 for (int i = 0; i < 4; i++) {
2907 if (inst->dst.writemask & (1 << i)) {
2908 acp[4 * inst->dst.index + i] = inst;
2909 acp_level[4 * inst->dst.index + i] = level;
2910 }
2911 }
2912 }
2913 }
2914
2915 ralloc_free(acp_level);
2916 ralloc_free(acp);
2917 }
2918
2919
2920 /**
2921 * Convert a shader's GLSL IR into a Mesa gl_program.
2922 */
2923 static struct gl_program *
2924 get_mesa_program(struct gl_context *ctx,
2925 struct gl_shader_program *shader_program,
2926 struct gl_shader *shader)
2927 {
2928 ir_to_mesa_visitor v;
2929 struct prog_instruction *mesa_instructions, *mesa_inst;
2930 ir_instruction **mesa_instruction_annotation;
2931 int i;
2932 struct gl_program *prog;
2933 GLenum target;
2934 const char *target_string;
2935 GLboolean progress;
2936 struct gl_shader_compiler_options *options =
2937 &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)];
2938
2939 switch (shader->Type) {
2940 case GL_VERTEX_SHADER:
2941 target = GL_VERTEX_PROGRAM_ARB;
2942 target_string = "vertex";
2943 break;
2944 case GL_FRAGMENT_SHADER:
2945 target = GL_FRAGMENT_PROGRAM_ARB;
2946 target_string = "fragment";
2947 break;
2948 case GL_GEOMETRY_SHADER:
2949 target = GL_GEOMETRY_PROGRAM_NV;
2950 target_string = "geometry";
2951 break;
2952 default:
2953 assert(!"should not be reached");
2954 return NULL;
2955 }
2956
2957 validate_ir_tree(shader->ir);
2958
2959 prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
2960 if (!prog)
2961 return NULL;
2962 prog->Parameters = _mesa_new_parameter_list();
2963 prog->Varying = _mesa_new_parameter_list();
2964 prog->Attributes = _mesa_new_parameter_list();
2965 v.ctx = ctx;
2966 v.prog = prog;
2967 v.shader_program = shader_program;
2968 v.options = options;
2969
2970 add_uniforms_to_parameters_list(shader_program, shader, prog);
2971
2972 /* Emit Mesa IR for main(). */
2973 visit_exec_list(shader->ir, &v);
2974 v.emit(NULL, OPCODE_END);
2975
2976 /* Now emit bodies for any functions that were used. */
2977 do {
2978 progress = GL_FALSE;
2979
2980 foreach_iter(exec_list_iterator, iter, v.function_signatures) {
2981 function_entry *entry = (function_entry *)iter.get();
2982
2983 if (!entry->bgn_inst) {
2984 v.current_function = entry;
2985
2986 entry->bgn_inst = v.emit(NULL, OPCODE_BGNSUB);
2987 entry->bgn_inst->function = entry;
2988
2989 visit_exec_list(&entry->sig->body, &v);
2990
2991 ir_to_mesa_instruction *last;
2992 last = (ir_to_mesa_instruction *)v.instructions.get_tail();
2993 if (last->op != OPCODE_RET)
2994 v.emit(NULL, OPCODE_RET);
2995
2996 ir_to_mesa_instruction *end;
2997 end = v.emit(NULL, OPCODE_ENDSUB);
2998 end->function = entry;
2999
3000 progress = GL_TRUE;
3001 }
3002 }
3003 } while (progress);
3004
3005 prog->NumTemporaries = v.next_temp;
3006
3007 int num_instructions = 0;
3008 foreach_iter(exec_list_iterator, iter, v.instructions) {
3009 num_instructions++;
3010 }
3011
3012 mesa_instructions =
3013 (struct prog_instruction *)calloc(num_instructions,
3014 sizeof(*mesa_instructions));
3015 mesa_instruction_annotation = ralloc_array(v.mem_ctx, ir_instruction *,
3016 num_instructions);
3017
3018 v.copy_propagate();
3019
3020 /* Convert ir_mesa_instructions into prog_instructions.
3021 */
3022 mesa_inst = mesa_instructions;
3023 i = 0;
3024 foreach_iter(exec_list_iterator, iter, v.instructions) {
3025 const ir_to_mesa_instruction *inst = (ir_to_mesa_instruction *)iter.get();
3026
3027 mesa_inst->Opcode = inst->op;
3028 mesa_inst->CondUpdate = inst->cond_update;
3029 if (inst->saturate)
3030 mesa_inst->SaturateMode = SATURATE_ZERO_ONE;
3031 mesa_inst->DstReg.File = inst->dst.file;
3032 mesa_inst->DstReg.Index = inst->dst.index;
3033 mesa_inst->DstReg.CondMask = inst->dst.cond_mask;
3034 mesa_inst->DstReg.WriteMask = inst->dst.writemask;
3035 mesa_inst->DstReg.RelAddr = inst->dst.reladdr != NULL;
3036 mesa_inst->SrcReg[0] = mesa_src_reg_from_ir_src_reg(inst->src[0]);
3037 mesa_inst->SrcReg[1] = mesa_src_reg_from_ir_src_reg(inst->src[1]);
3038 mesa_inst->SrcReg[2] = mesa_src_reg_from_ir_src_reg(inst->src[2]);
3039 mesa_inst->TexSrcUnit = inst->sampler;
3040 mesa_inst->TexSrcTarget = inst->tex_target;
3041 mesa_inst->TexShadow = inst->tex_shadow;
3042 mesa_instruction_annotation[i] = inst->ir;
3043
3044 /* Set IndirectRegisterFiles. */
3045 if (mesa_inst->DstReg.RelAddr)
3046 prog->IndirectRegisterFiles |= 1 << mesa_inst->DstReg.File;
3047
3048 /* Update program's bitmask of indirectly accessed register files */
3049 for (unsigned src = 0; src < 3; src++)
3050 if (mesa_inst->SrcReg[src].RelAddr)
3051 prog->IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File;
3052
3053 switch (mesa_inst->Opcode) {
3054 case OPCODE_IF:
3055 if (options->EmitNoIfs) {
3056 linker_warning(shader_program,
3057 "Couldn't flatten if-statement. "
3058 "This will likely result in software "
3059 "rasterization.\n");
3060 }
3061 break;
3062 case OPCODE_BGNLOOP:
3063 if (options->EmitNoLoops) {
3064 linker_warning(shader_program,
3065 "Couldn't unroll loop. "
3066 "This will likely result in software "
3067 "rasterization.\n");
3068 }
3069 break;
3070 case OPCODE_CONT:
3071 if (options->EmitNoCont) {
3072 linker_warning(shader_program,
3073 "Couldn't lower continue-statement. "
3074 "This will likely result in software "
3075 "rasterization.\n");
3076 }
3077 break;
3078 case OPCODE_BGNSUB:
3079 inst->function->inst = i;
3080 mesa_inst->Comment = strdup(inst->function->sig->function_name());
3081 break;
3082 case OPCODE_ENDSUB:
3083 mesa_inst->Comment = strdup(inst->function->sig->function_name());
3084 break;
3085 case OPCODE_CAL:
3086 mesa_inst->BranchTarget = inst->function->sig_id; /* rewritten later */
3087 break;
3088 case OPCODE_ARL:
3089 prog->NumAddressRegs = 1;
3090 break;
3091 default:
3092 break;
3093 }
3094
3095 mesa_inst++;
3096 i++;
3097
3098 if (!shader_program->LinkStatus)
3099 break;
3100 }
3101
3102 if (!shader_program->LinkStatus) {
3103 free(mesa_instructions);
3104 _mesa_reference_program(ctx, &shader->Program, NULL);
3105 return NULL;
3106 }
3107
3108 set_branchtargets(&v, mesa_instructions, num_instructions);
3109
3110 if (ctx->Shader.Flags & GLSL_DUMP) {
3111 printf("\n");
3112 printf("GLSL IR for linked %s program %d:\n", target_string,
3113 shader_program->Name);
3114 _mesa_print_ir(shader->ir, NULL);
3115 printf("\n");
3116 printf("\n");
3117 printf("Mesa IR for linked %s program %d:\n", target_string,
3118 shader_program->Name);
3119 print_program(mesa_instructions, mesa_instruction_annotation,
3120 num_instructions);
3121 }
3122
3123 prog->Instructions = mesa_instructions;
3124 prog->NumInstructions = num_instructions;
3125
3126 do_set_program_inouts(shader->ir, prog);
3127 count_resources(prog);
3128
3129 check_resources(ctx, shader_program, prog);
3130
3131 _mesa_reference_program(ctx, &shader->Program, prog);
3132
3133 if ((ctx->Shader.Flags & GLSL_NO_OPT) == 0) {
3134 _mesa_optimize_program(ctx, prog);
3135 }
3136
3137 return prog;
3138 }
3139
3140 extern "C" {
3141
3142 /**
3143 * Link a shader.
3144 * Called via ctx->Driver.LinkShader()
3145 * This actually involves converting GLSL IR into Mesa gl_programs with
3146 * code lowering and other optimizations.
3147 */
3148 GLboolean
3149 _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
3150 {
3151 assert(prog->LinkStatus);
3152
3153 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
3154 if (prog->_LinkedShaders[i] == NULL)
3155 continue;
3156
3157 bool progress;
3158 exec_list *ir = prog->_LinkedShaders[i]->ir;
3159 const struct gl_shader_compiler_options *options =
3160 &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)];
3161
3162 do {
3163 progress = false;
3164
3165 /* Lowering */
3166 do_mat_op_to_vec(ir);
3167 lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
3168 | LOG_TO_LOG2
3169 | ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
3170
3171 progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
3172
3173 progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress;
3174
3175 progress = lower_quadop_vector(ir, true) || progress;
3176
3177 if (options->EmitNoIfs) {
3178 progress = lower_discard(ir) || progress;
3179 progress = lower_if_to_cond_assign(ir) || progress;
3180 }
3181
3182 if (options->EmitNoNoise)
3183 progress = lower_noise(ir) || progress;
3184
3185 /* If there are forms of indirect addressing that the driver
3186 * cannot handle, perform the lowering pass.
3187 */
3188 if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput
3189 || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform)
3190 progress =
3191 lower_variable_index_to_cond_assign(ir,
3192 options->EmitNoIndirectInput,
3193 options->EmitNoIndirectOutput,
3194 options->EmitNoIndirectTemp,
3195 options->EmitNoIndirectUniform)
3196 || progress;
3197
3198 progress = do_vec_index_to_cond_assign(ir) || progress;
3199 } while (progress);
3200
3201 validate_ir_tree(ir);
3202 }
3203
3204 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
3205 struct gl_program *linked_prog;
3206
3207 if (prog->_LinkedShaders[i] == NULL)
3208 continue;
3209
3210 linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
3211
3212 if (linked_prog) {
3213 bool ok = true;
3214
3215 switch (prog->_LinkedShaders[i]->Type) {
3216 case GL_VERTEX_SHADER:
3217 _mesa_reference_vertprog(ctx, &prog->VertexProgram,
3218 (struct gl_vertex_program *)linked_prog);
3219 ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB,
3220 linked_prog);
3221 break;
3222 case GL_FRAGMENT_SHADER:
3223 _mesa_reference_fragprog(ctx, &prog->FragmentProgram,
3224 (struct gl_fragment_program *)linked_prog);
3225 ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB,
3226 linked_prog);
3227 break;
3228 case GL_GEOMETRY_SHADER:
3229 _mesa_reference_geomprog(ctx, &prog->GeometryProgram,
3230 (struct gl_geometry_program *)linked_prog);
3231 ok = ctx->Driver.ProgramStringNotify(ctx, GL_GEOMETRY_PROGRAM_NV,
3232 linked_prog);
3233 break;
3234 }
3235 if (!ok) {
3236 return GL_FALSE;
3237 }
3238 }
3239
3240 _mesa_reference_program(ctx, &linked_prog, NULL);
3241 }
3242
3243 return GL_TRUE;
3244 }
3245
3246
3247 /**
3248 * Compile a GLSL shader. Called via glCompileShader().
3249 */
3250 void
3251 _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader)
3252 {
3253 struct _mesa_glsl_parse_state *state =
3254 new(shader) _mesa_glsl_parse_state(ctx, shader->Type, shader);
3255
3256 const char *source = shader->Source;
3257 /* Check if the user called glCompileShader without first calling
3258 * glShaderSource. This should fail to compile, but not raise a GL_ERROR.
3259 */
3260 if (source == NULL) {
3261 shader->CompileStatus = GL_FALSE;
3262 return;
3263 }
3264
3265 state->error = preprocess(state, &source, &state->info_log,
3266 &ctx->Extensions, ctx->API);
3267
3268 if (ctx->Shader.Flags & GLSL_DUMP) {
3269 printf("GLSL source for %s shader %d:\n",
3270 _mesa_glsl_shader_target_name(state->target), shader->Name);
3271 printf("%s\n", shader->Source);
3272 }
3273
3274 if (!state->error) {
3275 _mesa_glsl_lexer_ctor(state, source);
3276 _mesa_glsl_parse(state);
3277 _mesa_glsl_lexer_dtor(state);
3278 }
3279
3280 ralloc_free(shader->ir);
3281 shader->ir = new(shader) exec_list;
3282 if (!state->error && !state->translation_unit.is_empty())
3283 _mesa_ast_to_hir(shader->ir, state);
3284
3285 if (!state->error && !shader->ir->is_empty()) {
3286 validate_ir_tree(shader->ir);
3287
3288 /* Do some optimization at compile time to reduce shader IR size
3289 * and reduce later work if the same shader is linked multiple times
3290 */
3291 while (do_common_optimization(shader->ir, false, 32))
3292 ;
3293
3294 validate_ir_tree(shader->ir);
3295 }
3296
3297 shader->symbols = state->symbols;
3298
3299 shader->CompileStatus = !state->error;
3300 shader->InfoLog = state->info_log;
3301 shader->Version = state->language_version;
3302 memcpy(shader->builtins_to_link, state->builtins_to_link,
3303 sizeof(shader->builtins_to_link[0]) * state->num_builtins_to_link);
3304 shader->num_builtins_to_link = state->num_builtins_to_link;
3305
3306 if (ctx->Shader.Flags & GLSL_LOG) {
3307 _mesa_write_shader_to_file(shader);
3308 }
3309
3310 if (ctx->Shader.Flags & GLSL_DUMP) {
3311 if (shader->CompileStatus) {
3312 printf("GLSL IR for shader %d:\n", shader->Name);
3313 _mesa_print_ir(shader->ir, NULL);
3314 printf("\n\n");
3315 } else {
3316 printf("GLSL shader %d failed to compile.\n", shader->Name);
3317 }
3318 if (shader->InfoLog && shader->InfoLog[0] != 0) {
3319 printf("GLSL shader %d info log:\n", shader->Name);
3320 printf("%s\n", shader->InfoLog);
3321 }
3322 }
3323
3324 /* Retain any live IR, but trash the rest. */
3325 reparent_ir(shader->ir, shader->ir);
3326
3327 ralloc_free(state);
3328 }
3329
3330
3331 /**
3332 * Link a GLSL shader program. Called via glLinkProgram().
3333 */
3334 void
3335 _mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
3336 {
3337 unsigned int i;
3338
3339 _mesa_clear_shader_program_data(ctx, prog);
3340
3341 prog->LinkStatus = GL_TRUE;
3342
3343 for (i = 0; i < prog->NumShaders; i++) {
3344 if (!prog->Shaders[i]->CompileStatus) {
3345 linker_error(prog, "linking with uncompiled shader");
3346 prog->LinkStatus = GL_FALSE;
3347 }
3348 }
3349
3350 prog->Varying = _mesa_new_parameter_list();
3351 _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL);
3352 _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL);
3353 _mesa_reference_geomprog(ctx, &prog->GeometryProgram, NULL);
3354
3355 if (prog->LinkStatus) {
3356 link_shaders(ctx, prog);
3357 }
3358
3359 if (prog->LinkStatus) {
3360 if (!ctx->Driver.LinkShader(ctx, prog)) {
3361 prog->LinkStatus = GL_FALSE;
3362 }
3363 }
3364
3365 set_uniform_initializers(ctx, prog);
3366
3367 if (ctx->Shader.Flags & GLSL_DUMP) {
3368 if (!prog->LinkStatus) {
3369 printf("GLSL shader program %d failed to link\n", prog->Name);
3370 }
3371
3372 if (prog->InfoLog && prog->InfoLog[0] != 0) {
3373 printf("GLSL shader program %d info log:\n", prog->Name);
3374 printf("%s\n", prog->InfoLog);
3375 }
3376 }
3377 }
3378
3379 } /* extern "C" */