5ea03b4424e86d6d0a103f569b9a0bbde185a15c
[mesa.git] / src / mesa / state_tracker / st_glsl_to_tgsi.cpp
1 /*
2 * Copyright (C) 2005-2007 Brian Paul All Rights Reserved.
3 * Copyright (C) 2008 VMware, Inc. All Rights Reserved.
4 * Copyright © 2010 Intel Corporation
5 * Copyright © 2011 Bryan Cain
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 * DEALINGS IN THE SOFTWARE.
25 */
26
27 /**
28 * \file glsl_to_tgsi.cpp
29 *
30 * Translate GLSL IR to Mesa's gl_program representation and to TGSI.
31 */
32
33 #include <stdio.h>
34 #include "main/compiler.h"
35 #include "ir.h"
36 #include "ir_visitor.h"
37 #include "ir_print_visitor.h"
38 #include "ir_expression_flattening.h"
39 #include "glsl_types.h"
40 #include "glsl_parser_extras.h"
41 #include "../glsl/program.h"
42 #include "ir_optimization.h"
43 #include "ast.h"
44
45 extern "C" {
46 #include "main/mtypes.h"
47 #include "main/shaderapi.h"
48 #include "main/shaderobj.h"
49 #include "main/uniforms.h"
50 #include "program/hash_table.h"
51 #include "program/prog_instruction.h"
52 #include "program/prog_optimize.h"
53 #include "program/prog_print.h"
54 #include "program/program.h"
55 #include "program/prog_uniform.h"
56 #include "program/prog_parameter.h"
57 #include "program/sampler.h"
58
59 #include "pipe/p_compiler.h"
60 #include "pipe/p_context.h"
61 #include "pipe/p_screen.h"
62 #include "pipe/p_shader_tokens.h"
63 #include "pipe/p_state.h"
64 #include "util/u_math.h"
65 #include "tgsi/tgsi_ureg.h"
66 #include "tgsi/tgsi_dump.h"
67 #include "st_context.h"
68 #include "st_program.h"
69 #include "st_glsl_to_tgsi.h"
70 #include "st_mesa_to_tgsi.h"
71
72 #define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) | \
73 (1 << PROGRAM_ENV_PARAM) | \
74 (1 << PROGRAM_STATE_VAR) | \
75 (1 << PROGRAM_NAMED_PARAM) | \
76 (1 << PROGRAM_CONSTANT) | \
77 (1 << PROGRAM_UNIFORM))
78 }
79
80 class st_src_reg;
81 class st_dst_reg;
82
83 static int swizzle_for_size(int size);
84
85 /**
86 * This struct is a corresponding struct to Mesa prog_src_register, with
87 * wider fields.
88 */
89 class st_src_reg {
90 public:
91 st_src_reg(gl_register_file file, int index, const glsl_type *type)
92 {
93 this->file = file;
94 this->index = index;
95 if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
96 this->swizzle = swizzle_for_size(type->vector_elements);
97 else
98 this->swizzle = SWIZZLE_XYZW;
99 this->negate = 0;
100 this->reladdr = NULL;
101 }
102
103 st_src_reg(gl_register_file file, int index)
104 {
105 this->file = file;
106 this->index = index;
107 this->swizzle = SWIZZLE_XYZW;
108 this->negate = 0;
109 this->reladdr = NULL;
110 }
111
112 st_src_reg()
113 {
114 this->file = PROGRAM_UNDEFINED;
115 this->index = 0;
116 this->swizzle = 0;
117 this->negate = 0;
118 this->reladdr = NULL;
119 }
120
121 explicit st_src_reg(st_dst_reg reg);
122
123 gl_register_file file; /**< PROGRAM_* from Mesa */
124 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
125 GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
126 int negate; /**< NEGATE_XYZW mask from mesa */
127 /** Register index should be offset by the integer in this reg. */
128 st_src_reg *reladdr;
129 };
130
131 class st_dst_reg {
132 public:
133 st_dst_reg(gl_register_file file, int writemask)
134 {
135 this->file = file;
136 this->index = 0;
137 this->writemask = writemask;
138 this->cond_mask = COND_TR;
139 this->reladdr = NULL;
140 }
141
142 st_dst_reg()
143 {
144 this->file = PROGRAM_UNDEFINED;
145 this->index = 0;
146 this->writemask = 0;
147 this->cond_mask = COND_TR;
148 this->reladdr = NULL;
149 }
150
151 explicit st_dst_reg(st_src_reg reg);
152
153 gl_register_file file; /**< PROGRAM_* from Mesa */
154 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
155 int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
156 GLuint cond_mask:4;
157 /** Register index should be offset by the integer in this reg. */
158 st_src_reg *reladdr;
159 };
160
161 st_src_reg::st_src_reg(st_dst_reg reg)
162 {
163 this->file = reg.file;
164 this->index = reg.index;
165 this->swizzle = SWIZZLE_XYZW;
166 this->negate = 0;
167 this->reladdr = NULL;
168 }
169
170 st_dst_reg::st_dst_reg(st_src_reg reg)
171 {
172 this->file = reg.file;
173 this->index = reg.index;
174 this->writemask = WRITEMASK_XYZW;
175 this->cond_mask = COND_TR;
176 this->reladdr = reg.reladdr;
177 }
178
179 class glsl_to_tgsi_instruction : public exec_node {
180 public:
181 /* Callers of this ralloc-based new need not call delete. It's
182 * easier to just ralloc_free 'ctx' (or any of its ancestors). */
183 static void* operator new(size_t size, void *ctx)
184 {
185 void *node;
186
187 node = rzalloc_size(ctx, size);
188 assert(node != NULL);
189
190 return node;
191 }
192
193 enum prog_opcode op;
194 st_dst_reg dst;
195 st_src_reg src[3];
196 /** Pointer to the ir source this tree came from for debugging */
197 ir_instruction *ir;
198 GLboolean cond_update;
199 bool saturate;
200 int sampler; /**< sampler index */
201 int tex_target; /**< One of TEXTURE_*_INDEX */
202 GLboolean tex_shadow;
203
204 class function_entry *function; /* Set on OPCODE_CAL or OPCODE_BGNSUB */
205 };
206
207 class variable_storage : public exec_node {
208 public:
209 variable_storage(ir_variable *var, gl_register_file file, int index)
210 : file(file), index(index), var(var)
211 {
212 /* empty */
213 }
214
215 gl_register_file file;
216 int index;
217 ir_variable *var; /* variable that maps to this, if any */
218 };
219
220 class function_entry : public exec_node {
221 public:
222 ir_function_signature *sig;
223
224 /**
225 * identifier of this function signature used by the program.
226 *
227 * At the point that Mesa instructions for function calls are
228 * generated, we don't know the address of the first instruction of
229 * the function body. So we make the BranchTarget that is called a
230 * small integer and rewrite them during set_branchtargets().
231 */
232 int sig_id;
233
234 /**
235 * Pointer to first instruction of the function body.
236 *
237 * Set during function body emits after main() is processed.
238 */
239 glsl_to_tgsi_instruction *bgn_inst;
240
241 /**
242 * Index of the first instruction of the function body in actual
243 * Mesa IR.
244 *
245 * Set after convertion from glsl_to_tgsi_instruction to prog_instruction.
246 */
247 int inst;
248
249 /** Storage for the return value. */
250 st_src_reg return_reg;
251 };
252
253 class glsl_to_tgsi_visitor : public ir_visitor {
254 public:
255 glsl_to_tgsi_visitor();
256 ~glsl_to_tgsi_visitor();
257
258 function_entry *current_function;
259
260 struct gl_context *ctx;
261 struct gl_program *prog;
262 struct gl_shader_program *shader_program;
263 struct gl_shader_compiler_options *options;
264
265 int next_temp;
266
267 int num_address_regs;
268 int samplers_used;
269 bool indirect_addr_temps;
270 bool indirect_addr_consts;
271
272 variable_storage *find_variable_storage(ir_variable *var);
273
274 function_entry *get_function_signature(ir_function_signature *sig);
275
276 st_src_reg get_temp(const glsl_type *type);
277 void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr);
278
279 st_src_reg st_src_reg_for_float(float val);
280
281 /**
282 * \name Visit methods
283 *
284 * As typical for the visitor pattern, there must be one \c visit method for
285 * each concrete subclass of \c ir_instruction. Virtual base classes within
286 * the hierarchy should not have \c visit methods.
287 */
288 /*@{*/
289 virtual void visit(ir_variable *);
290 virtual void visit(ir_loop *);
291 virtual void visit(ir_loop_jump *);
292 virtual void visit(ir_function_signature *);
293 virtual void visit(ir_function *);
294 virtual void visit(ir_expression *);
295 virtual void visit(ir_swizzle *);
296 virtual void visit(ir_dereference_variable *);
297 virtual void visit(ir_dereference_array *);
298 virtual void visit(ir_dereference_record *);
299 virtual void visit(ir_assignment *);
300 virtual void visit(ir_constant *);
301 virtual void visit(ir_call *);
302 virtual void visit(ir_return *);
303 virtual void visit(ir_discard *);
304 virtual void visit(ir_texture *);
305 virtual void visit(ir_if *);
306 /*@}*/
307
308 st_src_reg result;
309
310 /** List of variable_storage */
311 exec_list variables;
312
313 /** List of function_entry */
314 exec_list function_signatures;
315 int next_signature_id;
316
317 /** List of glsl_to_tgsi_instruction */
318 exec_list instructions;
319
320 glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op);
321
322 glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op,
323 st_dst_reg dst, st_src_reg src0);
324
325 glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op,
326 st_dst_reg dst, st_src_reg src0, st_src_reg src1);
327
328 glsl_to_tgsi_instruction *emit(ir_instruction *ir, enum prog_opcode op,
329 st_dst_reg dst,
330 st_src_reg src0, st_src_reg src1, st_src_reg src2);
331
332 /**
333 * Emit the correct dot-product instruction for the type of arguments
334 */
335 void emit_dp(ir_instruction *ir,
336 st_dst_reg dst,
337 st_src_reg src0,
338 st_src_reg src1,
339 unsigned elements);
340
341 void emit_scalar(ir_instruction *ir, enum prog_opcode op,
342 st_dst_reg dst, st_src_reg src0);
343
344 void emit_scalar(ir_instruction *ir, enum prog_opcode op,
345 st_dst_reg dst, st_src_reg src0, st_src_reg src1);
346
347 void emit_scs(ir_instruction *ir, enum prog_opcode op,
348 st_dst_reg dst, const st_src_reg &src);
349
350 GLboolean try_emit_mad(ir_expression *ir,
351 int mul_operand);
352 GLboolean try_emit_sat(ir_expression *ir);
353
354 void emit_swz(ir_expression *ir);
355
356 bool process_move_condition(ir_rvalue *ir);
357
358 void remove_output_reads(gl_register_file type);
359
360 void rename_temp_register(int index, int new_index);
361 int get_first_temp_read(int index);
362 int get_first_temp_write(int index);
363 int get_last_temp_read(int index);
364 int get_last_temp_write(int index);
365
366 void copy_propagate(void);
367 void eliminate_dead_code(void);
368 void merge_registers(void);
369 void renumber_registers(void);
370
371 void *mem_ctx;
372 };
373
374 static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, NULL);
375
376 static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP);
377
378 static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X);
379
380 static void
381 fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3);
382
383 static void
384 fail_link(struct gl_shader_program *prog, const char *fmt, ...)
385 {
386 va_list args;
387 va_start(args, fmt);
388 ralloc_vasprintf_append(&prog->InfoLog, fmt, args);
389 va_end(args);
390
391 prog->LinkStatus = GL_FALSE;
392 }
393
394 static int
395 swizzle_for_size(int size)
396 {
397 int size_swizzles[4] = {
398 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
399 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
400 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
401 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
402 };
403
404 assert((size >= 1) && (size <= 4));
405 return size_swizzles[size - 1];
406 }
407
408 glsl_to_tgsi_instruction *
409 glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op,
410 st_dst_reg dst,
411 st_src_reg src0, st_src_reg src1, st_src_reg src2)
412 {
413 glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction();
414 int num_reladdr = 0, i;
415
416 /* If we have to do relative addressing, we want to load the ARL
417 * reg directly for one of the regs, and preload the other reladdr
418 * sources into temps.
419 */
420 num_reladdr += dst.reladdr != NULL;
421 num_reladdr += src0.reladdr != NULL;
422 num_reladdr += src1.reladdr != NULL;
423 num_reladdr += src2.reladdr != NULL;
424
425 reladdr_to_temp(ir, &src2, &num_reladdr);
426 reladdr_to_temp(ir, &src1, &num_reladdr);
427 reladdr_to_temp(ir, &src0, &num_reladdr);
428
429 if (dst.reladdr) {
430 emit(ir, OPCODE_ARL, address_reg, *dst.reladdr);
431 num_reladdr--;
432 }
433 assert(num_reladdr == 0);
434
435 inst->op = op;
436 inst->dst = dst;
437 inst->src[0] = src0;
438 inst->src[1] = src1;
439 inst->src[2] = src2;
440 inst->ir = ir;
441
442 inst->function = NULL;
443
444 if (op == OPCODE_ARL)
445 this->num_address_regs = 1;
446
447 /* Update indirect addressing status used by TGSI */
448 if (dst.reladdr) {
449 switch(dst.file) {
450 case PROGRAM_TEMPORARY:
451 this->indirect_addr_temps = true;
452 break;
453 case PROGRAM_LOCAL_PARAM:
454 case PROGRAM_ENV_PARAM:
455 case PROGRAM_STATE_VAR:
456 case PROGRAM_NAMED_PARAM:
457 case PROGRAM_CONSTANT:
458 case PROGRAM_UNIFORM:
459 this->indirect_addr_consts = true;
460 break;
461 default:
462 break;
463 }
464 }
465 else {
466 for (i=0; i<3; i++) {
467 if(inst->src[i].reladdr) {
468 switch(dst.file) {
469 case PROGRAM_TEMPORARY:
470 this->indirect_addr_temps = true;
471 break;
472 case PROGRAM_LOCAL_PARAM:
473 case PROGRAM_ENV_PARAM:
474 case PROGRAM_STATE_VAR:
475 case PROGRAM_NAMED_PARAM:
476 case PROGRAM_CONSTANT:
477 case PROGRAM_UNIFORM:
478 this->indirect_addr_consts = true;
479 break;
480 default:
481 break;
482 }
483 }
484 }
485 }
486
487 this->instructions.push_tail(inst);
488
489 return inst;
490 }
491
492
493 glsl_to_tgsi_instruction *
494 glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op,
495 st_dst_reg dst, st_src_reg src0, st_src_reg src1)
496 {
497 return emit(ir, op, dst, src0, src1, undef_src);
498 }
499
500 glsl_to_tgsi_instruction *
501 glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op,
502 st_dst_reg dst, st_src_reg src0)
503 {
504 assert(dst.writemask != 0);
505 return emit(ir, op, dst, src0, undef_src, undef_src);
506 }
507
508 glsl_to_tgsi_instruction *
509 glsl_to_tgsi_visitor::emit(ir_instruction *ir, enum prog_opcode op)
510 {
511 return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
512 }
513
514 void
515 glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
516 st_dst_reg dst, st_src_reg src0, st_src_reg src1,
517 unsigned elements)
518 {
519 static const gl_inst_opcode dot_opcodes[] = {
520 OPCODE_DP2, OPCODE_DP3, OPCODE_DP4
521 };
522
523 emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
524 }
525
526 /**
527 * Emits Mesa scalar opcodes to produce unique answers across channels.
528 *
529 * Some Mesa opcodes are scalar-only, like ARB_fp/vp. The src X
530 * channel determines the result across all channels. So to do a vec4
531 * of this operation, we want to emit a scalar per source channel used
532 * to produce dest channels.
533 */
534 void
535 glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op,
536 st_dst_reg dst,
537 st_src_reg orig_src0, st_src_reg orig_src1)
538 {
539 int i, j;
540 int done_mask = ~dst.writemask;
541
542 /* Mesa RCP is a scalar operation splatting results to all channels,
543 * like ARB_fp/vp. So emit as many RCPs as necessary to cover our
544 * dst channels.
545 */
546 for (i = 0; i < 4; i++) {
547 GLuint this_mask = (1 << i);
548 glsl_to_tgsi_instruction *inst;
549 st_src_reg src0 = orig_src0;
550 st_src_reg src1 = orig_src1;
551
552 if (done_mask & this_mask)
553 continue;
554
555 GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
556 GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
557 for (j = i + 1; j < 4; j++) {
558 /* If there is another enabled component in the destination that is
559 * derived from the same inputs, generate its value on this pass as
560 * well.
561 */
562 if (!(done_mask & (1 << j)) &&
563 GET_SWZ(src0.swizzle, j) == src0_swiz &&
564 GET_SWZ(src1.swizzle, j) == src1_swiz) {
565 this_mask |= (1 << j);
566 }
567 }
568 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
569 src0_swiz, src0_swiz);
570 src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
571 src1_swiz, src1_swiz);
572
573 inst = emit(ir, op, dst, src0, src1);
574 inst->dst.writemask = this_mask;
575 done_mask |= this_mask;
576 }
577 }
578
579 void
580 glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op,
581 st_dst_reg dst, st_src_reg src0)
582 {
583 st_src_reg undef = undef_src;
584
585 undef.swizzle = SWIZZLE_XXXX;
586
587 emit_scalar(ir, op, dst, src0, undef);
588 }
589
590 /**
591 * Emit an OPCODE_SCS instruction
592 *
593 * The \c SCS opcode functions a bit differently than the other Mesa (or
594 * ARB_fragment_program) opcodes. Instead of splatting its result across all
595 * four components of the destination, it writes one value to the \c x
596 * component and another value to the \c y component.
597 *
598 * \param ir IR instruction being processed
599 * \param op Either \c OPCODE_SIN or \c OPCODE_COS depending on which
600 * value is desired.
601 * \param dst Destination register
602 * \param src Source register
603 */
604 void
605 glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, enum prog_opcode op,
606 st_dst_reg dst,
607 const st_src_reg &src)
608 {
609 /* Vertex programs cannot use the SCS opcode.
610 */
611 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) {
612 emit_scalar(ir, op, dst, src);
613 return;
614 }
615
616 const unsigned component = (op == OPCODE_SIN) ? 0 : 1;
617 const unsigned scs_mask = (1U << component);
618 int done_mask = ~dst.writemask;
619 st_src_reg tmp;
620
621 assert(op == OPCODE_SIN || op == OPCODE_COS);
622
623 /* If there are compnents in the destination that differ from the component
624 * that will be written by the SCS instrution, we'll need a temporary.
625 */
626 if (scs_mask != unsigned(dst.writemask)) {
627 tmp = get_temp(glsl_type::vec4_type);
628 }
629
630 for (unsigned i = 0; i < 4; i++) {
631 unsigned this_mask = (1U << i);
632 st_src_reg src0 = src;
633
634 if ((done_mask & this_mask) != 0)
635 continue;
636
637 /* The source swizzle specified which component of the source generates
638 * sine / cosine for the current component in the destination. The SCS
639 * instruction requires that this value be swizzle to the X component.
640 * Replace the current swizzle with a swizzle that puts the source in
641 * the X component.
642 */
643 unsigned src0_swiz = GET_SWZ(src.swizzle, i);
644
645 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
646 src0_swiz, src0_swiz);
647 for (unsigned j = i + 1; j < 4; j++) {
648 /* If there is another enabled component in the destination that is
649 * derived from the same inputs, generate its value on this pass as
650 * well.
651 */
652 if (!(done_mask & (1 << j)) &&
653 GET_SWZ(src0.swizzle, j) == src0_swiz) {
654 this_mask |= (1 << j);
655 }
656 }
657
658 if (this_mask != scs_mask) {
659 glsl_to_tgsi_instruction *inst;
660 st_dst_reg tmp_dst = st_dst_reg(tmp);
661
662 /* Emit the SCS instruction.
663 */
664 inst = emit(ir, OPCODE_SCS, tmp_dst, src0);
665 inst->dst.writemask = scs_mask;
666
667 /* Move the result of the SCS instruction to the desired location in
668 * the destination.
669 */
670 tmp.swizzle = MAKE_SWIZZLE4(component, component,
671 component, component);
672 inst = emit(ir, OPCODE_SCS, dst, tmp);
673 inst->dst.writemask = this_mask;
674 } else {
675 /* Emit the SCS instruction to write directly to the destination.
676 */
677 glsl_to_tgsi_instruction *inst = emit(ir, OPCODE_SCS, dst, src0);
678 inst->dst.writemask = scs_mask;
679 }
680
681 done_mask |= this_mask;
682 }
683 }
684
685 struct st_src_reg
686 glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
687 {
688 st_src_reg src(PROGRAM_CONSTANT, -1, NULL);
689
690 src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
691 &val, 1, &src.swizzle);
692
693 return src;
694 }
695
696 static int
697 type_size(const struct glsl_type *type)
698 {
699 unsigned int i;
700 int size;
701
702 switch (type->base_type) {
703 case GLSL_TYPE_UINT:
704 case GLSL_TYPE_INT:
705 case GLSL_TYPE_FLOAT:
706 case GLSL_TYPE_BOOL:
707 if (type->is_matrix()) {
708 return type->matrix_columns;
709 } else {
710 /* Regardless of size of vector, it gets a vec4. This is bad
711 * packing for things like floats, but otherwise arrays become a
712 * mess. Hopefully a later pass over the code can pack scalars
713 * down if appropriate.
714 */
715 return 1;
716 }
717 case GLSL_TYPE_ARRAY:
718 assert(type->length > 0);
719 return type_size(type->fields.array) * type->length;
720 case GLSL_TYPE_STRUCT:
721 size = 0;
722 for (i = 0; i < type->length; i++) {
723 size += type_size(type->fields.structure[i].type);
724 }
725 return size;
726 case GLSL_TYPE_SAMPLER:
727 /* Samplers take up one slot in UNIFORMS[], but they're baked in
728 * at link time.
729 */
730 return 1;
731 default:
732 assert(0);
733 return 0;
734 }
735 }
736
737 /**
738 * In the initial pass of codegen, we assign temporary numbers to
739 * intermediate results. (not SSA -- variable assignments will reuse
740 * storage). Actual register allocation for the Mesa VM occurs in a
741 * pass over the Mesa IR later.
742 */
743 st_src_reg
744 glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
745 {
746 st_src_reg src;
747 int swizzle[4];
748 int i;
749
750 src.file = PROGRAM_TEMPORARY;
751 src.index = next_temp;
752 src.reladdr = NULL;
753 next_temp += type_size(type);
754
755 if (type->is_array() || type->is_record()) {
756 src.swizzle = SWIZZLE_NOOP;
757 } else {
758 for (i = 0; i < type->vector_elements; i++)
759 swizzle[i] = i;
760 for (; i < 4; i++)
761 swizzle[i] = type->vector_elements - 1;
762 src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1],
763 swizzle[2], swizzle[3]);
764 }
765 src.negate = 0;
766
767 return src;
768 }
769
770 variable_storage *
771 glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var)
772 {
773
774 variable_storage *entry;
775
776 foreach_iter(exec_list_iterator, iter, this->variables) {
777 entry = (variable_storage *)iter.get();
778
779 if (entry->var == var)
780 return entry;
781 }
782
783 return NULL;
784 }
785
786 void
787 glsl_to_tgsi_visitor::visit(ir_variable *ir)
788 {
789 if (strcmp(ir->name, "gl_FragCoord") == 0) {
790 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
791
792 fp->OriginUpperLeft = ir->origin_upper_left;
793 fp->PixelCenterInteger = ir->pixel_center_integer;
794
795 } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
796 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
797 switch (ir->depth_layout) {
798 case ir_depth_layout_none:
799 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_NONE;
800 break;
801 case ir_depth_layout_any:
802 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_ANY;
803 break;
804 case ir_depth_layout_greater:
805 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_GREATER;
806 break;
807 case ir_depth_layout_less:
808 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_LESS;
809 break;
810 case ir_depth_layout_unchanged:
811 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_UNCHANGED;
812 break;
813 default:
814 assert(0);
815 break;
816 }
817 }
818
819 if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
820 unsigned int i;
821 const ir_state_slot *const slots = ir->state_slots;
822 assert(ir->state_slots != NULL);
823
824 /* Check if this statevar's setup in the STATE file exactly
825 * matches how we'll want to reference it as a
826 * struct/array/whatever. If not, then we need to move it into
827 * temporary storage and hope that it'll get copy-propagated
828 * out.
829 */
830 for (i = 0; i < ir->num_state_slots; i++) {
831 if (slots[i].swizzle != SWIZZLE_XYZW) {
832 break;
833 }
834 }
835
836 struct variable_storage *storage;
837 st_dst_reg dst;
838 if (i == ir->num_state_slots) {
839 /* We'll set the index later. */
840 storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
841 this->variables.push_tail(storage);
842
843 dst = undef_dst;
844 } else {
845 /* The variable_storage constructor allocates slots based on the size
846 * of the type. However, this had better match the number of state
847 * elements that we're going to copy into the new temporary.
848 */
849 assert((int) ir->num_state_slots == type_size(ir->type));
850
851 storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY,
852 this->next_temp);
853 this->variables.push_tail(storage);
854 this->next_temp += type_size(ir->type);
855
856 dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index, NULL));
857 }
858
859
860 for (unsigned int i = 0; i < ir->num_state_slots; i++) {
861 int index = _mesa_add_state_reference(this->prog->Parameters,
862 (gl_state_index *)slots[i].tokens);
863
864 if (storage->file == PROGRAM_STATE_VAR) {
865 if (storage->index == -1) {
866 storage->index = index;
867 } else {
868 assert(index == storage->index + (int)i);
869 }
870 } else {
871 st_src_reg src(PROGRAM_STATE_VAR, index, NULL);
872 src.swizzle = slots[i].swizzle;
873 emit(ir, OPCODE_MOV, dst, src);
874 /* even a float takes up a whole vec4 reg in a struct/array. */
875 dst.index++;
876 }
877 }
878
879 if (storage->file == PROGRAM_TEMPORARY &&
880 dst.index != storage->index + (int) ir->num_state_slots) {
881 fail_link(this->shader_program,
882 "failed to load builtin uniform `%s' (%d/%d regs loaded)\n",
883 ir->name, dst.index - storage->index,
884 type_size(ir->type));
885 }
886 }
887 }
888
889 void
890 glsl_to_tgsi_visitor::visit(ir_loop *ir)
891 {
892 ir_dereference_variable *counter = NULL;
893
894 if (ir->counter != NULL)
895 counter = new(ir) ir_dereference_variable(ir->counter);
896
897 if (ir->from != NULL) {
898 assert(ir->counter != NULL);
899
900 ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL);
901
902 a->accept(this);
903 delete a;
904 }
905
906 emit(NULL, OPCODE_BGNLOOP);
907
908 if (ir->to) {
909 ir_expression *e =
910 new(ir) ir_expression(ir->cmp, glsl_type::bool_type,
911 counter, ir->to);
912 ir_if *if_stmt = new(ir) ir_if(e);
913
914 ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break);
915
916 if_stmt->then_instructions.push_tail(brk);
917
918 if_stmt->accept(this);
919
920 delete if_stmt;
921 delete e;
922 delete brk;
923 }
924
925 visit_exec_list(&ir->body_instructions, this);
926
927 if (ir->increment) {
928 ir_expression *e =
929 new(ir) ir_expression(ir_binop_add, counter->type,
930 counter, ir->increment);
931
932 ir_assignment *a = new(ir) ir_assignment(counter, e, NULL);
933
934 a->accept(this);
935 delete a;
936 delete e;
937 }
938
939 emit(NULL, OPCODE_ENDLOOP);
940 }
941
942 void
943 glsl_to_tgsi_visitor::visit(ir_loop_jump *ir)
944 {
945 switch (ir->mode) {
946 case ir_loop_jump::jump_break:
947 emit(NULL, OPCODE_BRK);
948 break;
949 case ir_loop_jump::jump_continue:
950 emit(NULL, OPCODE_CONT);
951 break;
952 }
953 }
954
955
956 void
957 glsl_to_tgsi_visitor::visit(ir_function_signature *ir)
958 {
959 assert(0);
960 (void)ir;
961 }
962
963 void
964 glsl_to_tgsi_visitor::visit(ir_function *ir)
965 {
966 /* Ignore function bodies other than main() -- we shouldn't see calls to
967 * them since they should all be inlined before we get to glsl_to_tgsi.
968 */
969 if (strcmp(ir->name, "main") == 0) {
970 const ir_function_signature *sig;
971 exec_list empty;
972
973 sig = ir->matching_signature(&empty);
974
975 assert(sig);
976
977 foreach_iter(exec_list_iterator, iter, sig->body) {
978 ir_instruction *ir = (ir_instruction *)iter.get();
979
980 ir->accept(this);
981 }
982 }
983 }
984
985 GLboolean
986 glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
987 {
988 int nonmul_operand = 1 - mul_operand;
989 st_src_reg a, b, c;
990
991 ir_expression *expr = ir->operands[mul_operand]->as_expression();
992 if (!expr || expr->operation != ir_binop_mul)
993 return false;
994
995 expr->operands[0]->accept(this);
996 a = this->result;
997 expr->operands[1]->accept(this);
998 b = this->result;
999 ir->operands[nonmul_operand]->accept(this);
1000 c = this->result;
1001
1002 this->result = get_temp(ir->type);
1003 emit(ir, OPCODE_MAD, st_dst_reg(this->result), a, b, c);
1004
1005 return true;
1006 }
1007
1008 GLboolean
1009 glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
1010 {
1011 /* Saturates were only introduced to vertex programs in
1012 * NV_vertex_program3, so don't give them to drivers in the VP.
1013 */
1014 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB)
1015 return false;
1016
1017 ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
1018 if (!sat_src)
1019 return false;
1020
1021 sat_src->accept(this);
1022 st_src_reg src = this->result;
1023
1024 this->result = get_temp(ir->type);
1025 glsl_to_tgsi_instruction *inst;
1026 inst = emit(ir, OPCODE_MOV, st_dst_reg(this->result), src);
1027 inst->saturate = true;
1028
1029 return true;
1030 }
1031
1032 void
1033 glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
1034 st_src_reg *reg, int *num_reladdr)
1035 {
1036 if (!reg->reladdr)
1037 return;
1038
1039 emit(ir, OPCODE_ARL, address_reg, *reg->reladdr);
1040
1041 if (*num_reladdr != 1) {
1042 st_src_reg temp = get_temp(glsl_type::vec4_type);
1043
1044 emit(ir, OPCODE_MOV, st_dst_reg(temp), *reg);
1045 *reg = temp;
1046 }
1047
1048 (*num_reladdr)--;
1049 }
1050
1051 void
1052 glsl_to_tgsi_visitor::emit_swz(ir_expression *ir)
1053 {
1054 /* Assume that the vector operator is in a form compatible with OPCODE_SWZ.
1055 * This means that each of the operands is either an immediate value of -1,
1056 * 0, or 1, or is a component from one source register (possibly with
1057 * negation).
1058 */
1059 uint8_t components[4] = { 0 };
1060 bool negate[4] = { false };
1061 ir_variable *var = NULL;
1062
1063 for (unsigned i = 0; i < ir->type->vector_elements; i++) {
1064 ir_rvalue *op = ir->operands[i];
1065
1066 assert(op->type->is_scalar());
1067
1068 while (op != NULL) {
1069 switch (op->ir_type) {
1070 case ir_type_constant: {
1071
1072 assert(op->type->is_scalar());
1073
1074 const ir_constant *const c = op->as_constant();
1075 if (c->is_one()) {
1076 components[i] = SWIZZLE_ONE;
1077 } else if (c->is_zero()) {
1078 components[i] = SWIZZLE_ZERO;
1079 } else if (c->is_negative_one()) {
1080 components[i] = SWIZZLE_ONE;
1081 negate[i] = true;
1082 } else {
1083 assert(!"SWZ constant must be 0.0 or 1.0.");
1084 }
1085
1086 op = NULL;
1087 break;
1088 }
1089
1090 case ir_type_dereference_variable: {
1091 ir_dereference_variable *const deref =
1092 (ir_dereference_variable *) op;
1093
1094 assert((var == NULL) || (deref->var == var));
1095 components[i] = SWIZZLE_X;
1096 var = deref->var;
1097 op = NULL;
1098 break;
1099 }
1100
1101 case ir_type_expression: {
1102 ir_expression *const expr = (ir_expression *) op;
1103
1104 assert(expr->operation == ir_unop_neg);
1105 negate[i] = true;
1106
1107 op = expr->operands[0];
1108 break;
1109 }
1110
1111 case ir_type_swizzle: {
1112 ir_swizzle *const swiz = (ir_swizzle *) op;
1113
1114 components[i] = swiz->mask.x;
1115 op = swiz->val;
1116 break;
1117 }
1118
1119 default:
1120 assert(!"Should not get here.");
1121 return;
1122 }
1123 }
1124 }
1125
1126 assert(var != NULL);
1127
1128 ir_dereference_variable *const deref =
1129 new(mem_ctx) ir_dereference_variable(var);
1130
1131 this->result.file = PROGRAM_UNDEFINED;
1132 deref->accept(this);
1133 if (this->result.file == PROGRAM_UNDEFINED) {
1134 ir_print_visitor v;
1135 printf("Failed to get tree for expression operand:\n");
1136 deref->accept(&v);
1137 exit(1);
1138 }
1139
1140 st_src_reg src;
1141
1142 src = this->result;
1143 src.swizzle = MAKE_SWIZZLE4(components[0],
1144 components[1],
1145 components[2],
1146 components[3]);
1147 src.negate = ((unsigned(negate[0]) << 0)
1148 | (unsigned(negate[1]) << 1)
1149 | (unsigned(negate[2]) << 2)
1150 | (unsigned(negate[3]) << 3));
1151
1152 /* Storage for our result. Ideally for an assignment we'd be using the
1153 * actual storage for the result here, instead.
1154 */
1155 const st_src_reg result_src = get_temp(ir->type);
1156 st_dst_reg result_dst = st_dst_reg(result_src);
1157
1158 /* Limit writes to the channels that will be used by result_src later.
1159 * This does limit this temp's use as a temporary for multi-instruction
1160 * sequences.
1161 */
1162 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1163
1164 emit(ir, OPCODE_SWZ, result_dst, src);
1165 this->result = result_src;
1166 }
1167
1168 void
1169 glsl_to_tgsi_visitor::visit(ir_expression *ir)
1170 {
1171 unsigned int operand;
1172 st_src_reg op[Elements(ir->operands)];
1173 st_src_reg result_src;
1174 st_dst_reg result_dst;
1175
1176 /* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c)
1177 */
1178 if (ir->operation == ir_binop_add) {
1179 if (try_emit_mad(ir, 1))
1180 return;
1181 if (try_emit_mad(ir, 0))
1182 return;
1183 }
1184 if (try_emit_sat(ir))
1185 return;
1186
1187 if (ir->operation == ir_quadop_vector) {
1188 this->emit_swz(ir);
1189 return;
1190 }
1191
1192 for (operand = 0; operand < ir->get_num_operands(); operand++) {
1193 this->result.file = PROGRAM_UNDEFINED;
1194 ir->operands[operand]->accept(this);
1195 if (this->result.file == PROGRAM_UNDEFINED) {
1196 ir_print_visitor v;
1197 printf("Failed to get tree for expression operand:\n");
1198 ir->operands[operand]->accept(&v);
1199 exit(1);
1200 }
1201 op[operand] = this->result;
1202
1203 /* Matrix expression operands should have been broken down to vector
1204 * operations already.
1205 */
1206 assert(!ir->operands[operand]->type->is_matrix());
1207 }
1208
1209 int vector_elements = ir->operands[0]->type->vector_elements;
1210 if (ir->operands[1]) {
1211 vector_elements = MAX2(vector_elements,
1212 ir->operands[1]->type->vector_elements);
1213 }
1214
1215 this->result.file = PROGRAM_UNDEFINED;
1216
1217 /* Storage for our result. Ideally for an assignment we'd be using
1218 * the actual storage for the result here, instead.
1219 */
1220 result_src = get_temp(ir->type);
1221 /* convenience for the emit functions below. */
1222 result_dst = st_dst_reg(result_src);
1223 /* Limit writes to the channels that will be used by result_src later.
1224 * This does limit this temp's use as a temporary for multi-instruction
1225 * sequences.
1226 */
1227 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1228
1229 switch (ir->operation) {
1230 case ir_unop_logic_not:
1231 emit(ir, OPCODE_SEQ, result_dst, op[0], st_src_reg_for_float(0.0));
1232 break;
1233 case ir_unop_neg:
1234 op[0].negate = ~op[0].negate;
1235 result_src = op[0];
1236 break;
1237 case ir_unop_abs:
1238 emit(ir, OPCODE_ABS, result_dst, op[0]);
1239 break;
1240 case ir_unop_sign:
1241 emit(ir, OPCODE_SSG, result_dst, op[0]);
1242 break;
1243 case ir_unop_rcp:
1244 emit_scalar(ir, OPCODE_RCP, result_dst, op[0]);
1245 break;
1246
1247 case ir_unop_exp2:
1248 emit_scalar(ir, OPCODE_EX2, result_dst, op[0]);
1249 break;
1250 case ir_unop_exp:
1251 case ir_unop_log:
1252 assert(!"not reached: should be handled by ir_explog_to_explog2");
1253 break;
1254 case ir_unop_log2:
1255 emit_scalar(ir, OPCODE_LG2, result_dst, op[0]);
1256 break;
1257 case ir_unop_sin:
1258 emit_scalar(ir, OPCODE_SIN, result_dst, op[0]);
1259 break;
1260 case ir_unop_cos:
1261 emit_scalar(ir, OPCODE_COS, result_dst, op[0]);
1262 break;
1263 case ir_unop_sin_reduced:
1264 emit_scs(ir, OPCODE_SIN, result_dst, op[0]);
1265 break;
1266 case ir_unop_cos_reduced:
1267 emit_scs(ir, OPCODE_COS, result_dst, op[0]);
1268 break;
1269
1270 case ir_unop_dFdx:
1271 emit(ir, OPCODE_DDX, result_dst, op[0]);
1272 break;
1273 case ir_unop_dFdy:
1274 emit(ir, OPCODE_DDY, result_dst, op[0]);
1275 break;
1276
1277 case ir_unop_noise: {
1278 const enum prog_opcode opcode =
1279 prog_opcode(OPCODE_NOISE1
1280 + (ir->operands[0]->type->vector_elements) - 1);
1281 assert((opcode >= OPCODE_NOISE1) && (opcode <= OPCODE_NOISE4));
1282
1283 emit(ir, opcode, result_dst, op[0]);
1284 break;
1285 }
1286
1287 case ir_binop_add:
1288 emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
1289 break;
1290 case ir_binop_sub:
1291 emit(ir, OPCODE_SUB, result_dst, op[0], op[1]);
1292 break;
1293
1294 case ir_binop_mul:
1295 emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
1296 break;
1297 case ir_binop_div:
1298 assert(!"not reached: should be handled by ir_div_to_mul_rcp");
1299 case ir_binop_mod:
1300 assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
1301 break;
1302
1303 case ir_binop_less:
1304 emit(ir, OPCODE_SLT, result_dst, op[0], op[1]);
1305 break;
1306 case ir_binop_greater:
1307 emit(ir, OPCODE_SGT, result_dst, op[0], op[1]);
1308 break;
1309 case ir_binop_lequal:
1310 emit(ir, OPCODE_SLE, result_dst, op[0], op[1]);
1311 break;
1312 case ir_binop_gequal:
1313 emit(ir, OPCODE_SGE, result_dst, op[0], op[1]);
1314 break;
1315 case ir_binop_equal:
1316 emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
1317 break;
1318 case ir_binop_nequal:
1319 emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
1320 break;
1321 case ir_binop_all_equal:
1322 /* "==" operator producing a scalar boolean. */
1323 if (ir->operands[0]->type->is_vector() ||
1324 ir->operands[1]->type->is_vector()) {
1325 st_src_reg temp = get_temp(glsl_type::vec4_type);
1326 emit(ir, OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
1327 emit_dp(ir, result_dst, temp, temp, vector_elements);
1328 emit(ir, OPCODE_SEQ, result_dst, result_src, st_src_reg_for_float(0.0));
1329 } else {
1330 emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
1331 }
1332 break;
1333 case ir_binop_any_nequal:
1334 /* "!=" operator producing a scalar boolean. */
1335 if (ir->operands[0]->type->is_vector() ||
1336 ir->operands[1]->type->is_vector()) {
1337 st_src_reg temp = get_temp(glsl_type::vec4_type);
1338 emit(ir, OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
1339 emit_dp(ir, result_dst, temp, temp, vector_elements);
1340 emit(ir, OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
1341 } else {
1342 emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
1343 }
1344 break;
1345
1346 case ir_unop_any:
1347 assert(ir->operands[0]->type->is_vector());
1348 emit_dp(ir, result_dst, op[0], op[0],
1349 ir->operands[0]->type->vector_elements);
1350 emit(ir, OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
1351 break;
1352
1353 case ir_binop_logic_xor:
1354 emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
1355 break;
1356
1357 case ir_binop_logic_or:
1358 /* This could be a saturated add and skip the SNE. */
1359 emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
1360 emit(ir, OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
1361 break;
1362
1363 case ir_binop_logic_and:
1364 /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
1365 emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
1366 break;
1367
1368 case ir_binop_dot:
1369 assert(ir->operands[0]->type->is_vector());
1370 assert(ir->operands[0]->type == ir->operands[1]->type);
1371 emit_dp(ir, result_dst, op[0], op[1],
1372 ir->operands[0]->type->vector_elements);
1373 break;
1374
1375 case ir_unop_sqrt:
1376 /* sqrt(x) = x * rsq(x). */
1377 emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]);
1378 emit(ir, OPCODE_MUL, result_dst, result_src, op[0]);
1379 /* For incoming channels <= 0, set the result to 0. */
1380 op[0].negate = ~op[0].negate;
1381 emit(ir, OPCODE_CMP, result_dst,
1382 op[0], result_src, st_src_reg_for_float(0.0));
1383 break;
1384 case ir_unop_rsq:
1385 emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]);
1386 break;
1387 case ir_unop_i2f:
1388 case ir_unop_b2f:
1389 case ir_unop_b2i:
1390 /* Mesa IR lacks types, ints are stored as truncated floats. */
1391 result_src = op[0];
1392 break;
1393 case ir_unop_f2i:
1394 emit(ir, OPCODE_TRUNC, result_dst, op[0]);
1395 break;
1396 case ir_unop_f2b:
1397 case ir_unop_i2b:
1398 emit(ir, OPCODE_SNE, result_dst,
1399 op[0], st_src_reg_for_float(0.0));
1400 break;
1401 case ir_unop_trunc:
1402 emit(ir, OPCODE_TRUNC, result_dst, op[0]);
1403 break;
1404 case ir_unop_ceil:
1405 op[0].negate = ~op[0].negate;
1406 emit(ir, OPCODE_FLR, result_dst, op[0]);
1407 result_src.negate = ~result_src.negate;
1408 break;
1409 case ir_unop_floor:
1410 emit(ir, OPCODE_FLR, result_dst, op[0]);
1411 break;
1412 case ir_unop_fract:
1413 emit(ir, OPCODE_FRC, result_dst, op[0]);
1414 break;
1415
1416 case ir_binop_min:
1417 emit(ir, OPCODE_MIN, result_dst, op[0], op[1]);
1418 break;
1419 case ir_binop_max:
1420 emit(ir, OPCODE_MAX, result_dst, op[0], op[1]);
1421 break;
1422 case ir_binop_pow:
1423 emit_scalar(ir, OPCODE_POW, result_dst, op[0], op[1]);
1424 break;
1425
1426 case ir_unop_bit_not:
1427 case ir_unop_u2f:
1428 case ir_binop_lshift:
1429 case ir_binop_rshift:
1430 case ir_binop_bit_and:
1431 case ir_binop_bit_xor:
1432 case ir_binop_bit_or:
1433 case ir_unop_round_even:
1434 assert(!"GLSL 1.30 features unsupported");
1435 break;
1436
1437 case ir_quadop_vector:
1438 /* This operation should have already been handled.
1439 */
1440 assert(!"Should not get here.");
1441 break;
1442 }
1443
1444 this->result = result_src;
1445 }
1446
1447
1448 void
1449 glsl_to_tgsi_visitor::visit(ir_swizzle *ir)
1450 {
1451 st_src_reg src;
1452 int i;
1453 int swizzle[4];
1454
1455 /* Note that this is only swizzles in expressions, not those on the left
1456 * hand side of an assignment, which do write masking. See ir_assignment
1457 * for that.
1458 */
1459
1460 ir->val->accept(this);
1461 src = this->result;
1462 assert(src.file != PROGRAM_UNDEFINED);
1463
1464 for (i = 0; i < 4; i++) {
1465 if (i < ir->type->vector_elements) {
1466 switch (i) {
1467 case 0:
1468 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x);
1469 break;
1470 case 1:
1471 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y);
1472 break;
1473 case 2:
1474 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z);
1475 break;
1476 case 3:
1477 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w);
1478 break;
1479 }
1480 } else {
1481 /* If the type is smaller than a vec4, replicate the last
1482 * channel out.
1483 */
1484 swizzle[i] = swizzle[ir->type->vector_elements - 1];
1485 }
1486 }
1487
1488 src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1489
1490 this->result = src;
1491 }
1492
1493 void
1494 glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
1495 {
1496 variable_storage *entry = find_variable_storage(ir->var);
1497 ir_variable *var = ir->var;
1498
1499 if (!entry) {
1500 switch (var->mode) {
1501 case ir_var_uniform:
1502 entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
1503 var->location);
1504 this->variables.push_tail(entry);
1505 break;
1506 case ir_var_in:
1507 case ir_var_inout:
1508 /* The linker assigns locations for varyings and attributes,
1509 * including deprecated builtins (like gl_Color), user-assign
1510 * generic attributes (glBindVertexLocation), and
1511 * user-defined varyings.
1512 *
1513 * FINISHME: We would hit this path for function arguments. Fix!
1514 */
1515 assert(var->location != -1);
1516 entry = new(mem_ctx) variable_storage(var,
1517 PROGRAM_INPUT,
1518 var->location);
1519 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
1520 var->location >= VERT_ATTRIB_GENERIC0) {
1521 _mesa_add_attribute(this->prog->Attributes,
1522 var->name,
1523 _mesa_sizeof_glsl_type(var->type->gl_type),
1524 var->type->gl_type,
1525 var->location - VERT_ATTRIB_GENERIC0);
1526 }
1527 break;
1528 case ir_var_out:
1529 assert(var->location != -1);
1530 entry = new(mem_ctx) variable_storage(var,
1531 PROGRAM_OUTPUT,
1532 var->location);
1533 break;
1534 case ir_var_system_value:
1535 entry = new(mem_ctx) variable_storage(var,
1536 PROGRAM_SYSTEM_VALUE,
1537 var->location);
1538 break;
1539 case ir_var_auto:
1540 case ir_var_temporary:
1541 entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY,
1542 this->next_temp);
1543 this->variables.push_tail(entry);
1544
1545 next_temp += type_size(var->type);
1546 break;
1547 }
1548
1549 if (!entry) {
1550 printf("Failed to make storage for %s\n", var->name);
1551 exit(1);
1552 }
1553 }
1554
1555 this->result = st_src_reg(entry->file, entry->index, var->type);
1556 }
1557
1558 void
1559 glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
1560 {
1561 ir_constant *index;
1562 st_src_reg src;
1563 int element_size = type_size(ir->type);
1564
1565 index = ir->array_index->constant_expression_value();
1566
1567 ir->array->accept(this);
1568 src = this->result;
1569
1570 if (index) {
1571 src.index += index->value.i[0] * element_size;
1572 } else {
1573 st_src_reg array_base = this->result;
1574 /* Variable index array dereference. It eats the "vec4" of the
1575 * base of the array and an index that offsets the Mesa register
1576 * index.
1577 */
1578 ir->array_index->accept(this);
1579
1580 st_src_reg index_reg;
1581
1582 if (element_size == 1) {
1583 index_reg = this->result;
1584 } else {
1585 index_reg = get_temp(glsl_type::float_type);
1586
1587 emit(ir, OPCODE_MUL, st_dst_reg(index_reg),
1588 this->result, st_src_reg_for_float(element_size));
1589 }
1590
1591 src.reladdr = ralloc(mem_ctx, st_src_reg);
1592 memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1593 }
1594
1595 /* If the type is smaller than a vec4, replicate the last channel out. */
1596 if (ir->type->is_scalar() || ir->type->is_vector())
1597 src.swizzle = swizzle_for_size(ir->type->vector_elements);
1598 else
1599 src.swizzle = SWIZZLE_NOOP;
1600
1601 this->result = src;
1602 }
1603
1604 void
1605 glsl_to_tgsi_visitor::visit(ir_dereference_record *ir)
1606 {
1607 unsigned int i;
1608 const glsl_type *struct_type = ir->record->type;
1609 int offset = 0;
1610
1611 ir->record->accept(this);
1612
1613 for (i = 0; i < struct_type->length; i++) {
1614 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1615 break;
1616 offset += type_size(struct_type->fields.structure[i].type);
1617 }
1618
1619 /* If the type is smaller than a vec4, replicate the last channel out. */
1620 if (ir->type->is_scalar() || ir->type->is_vector())
1621 this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1622 else
1623 this->result.swizzle = SWIZZLE_NOOP;
1624
1625 this->result.index += offset;
1626 }
1627
1628 /**
1629 * We want to be careful in assignment setup to hit the actual storage
1630 * instead of potentially using a temporary like we might with the
1631 * ir_dereference handler.
1632 */
1633 static st_dst_reg
1634 get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v)
1635 {
1636 /* The LHS must be a dereference. If the LHS is a variable indexed array
1637 * access of a vector, it must be separated into a series conditional moves
1638 * before reaching this point (see ir_vec_index_to_cond_assign).
1639 */
1640 assert(ir->as_dereference());
1641 ir_dereference_array *deref_array = ir->as_dereference_array();
1642 if (deref_array) {
1643 assert(!deref_array->array->type->is_vector());
1644 }
1645
1646 /* Use the rvalue deref handler for the most part. We'll ignore
1647 * swizzles in it and write swizzles using writemask, though.
1648 */
1649 ir->accept(v);
1650 return st_dst_reg(v->result);
1651 }
1652
1653 /**
1654 * Process the condition of a conditional assignment
1655 *
1656 * Examines the condition of a conditional assignment to generate the optimal
1657 * first operand of a \c CMP instruction. If the condition is a relational
1658 * operator with 0 (e.g., \c ir_binop_less), the value being compared will be
1659 * used as the source for the \c CMP instruction. Otherwise the comparison
1660 * is processed to a boolean result, and the boolean result is used as the
1661 * operand to the CMP instruction.
1662 */
1663 bool
1664 glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir)
1665 {
1666 ir_rvalue *src_ir = ir;
1667 bool negate = true;
1668 bool switch_order = false;
1669
1670 ir_expression *const expr = ir->as_expression();
1671 if ((expr != NULL) && (expr->get_num_operands() == 2)) {
1672 bool zero_on_left = false;
1673
1674 if (expr->operands[0]->is_zero()) {
1675 src_ir = expr->operands[1];
1676 zero_on_left = true;
1677 } else if (expr->operands[1]->is_zero()) {
1678 src_ir = expr->operands[0];
1679 zero_on_left = false;
1680 }
1681
1682 /* a is - 0 + - 0 +
1683 * (a < 0) T F F ( a < 0) T F F
1684 * (0 < a) F F T (-a < 0) F F T
1685 * (a <= 0) T T F (-a < 0) F F T (swap order of other operands)
1686 * (0 <= a) F T T ( a < 0) T F F (swap order of other operands)
1687 * (a > 0) F F T (-a < 0) F F T
1688 * (0 > a) T F F ( a < 0) T F F
1689 * (a >= 0) F T T ( a < 0) T F F (swap order of other operands)
1690 * (0 >= a) T T F (-a < 0) F F T (swap order of other operands)
1691 *
1692 * Note that exchanging the order of 0 and 'a' in the comparison simply
1693 * means that the value of 'a' should be negated.
1694 */
1695 if (src_ir != ir) {
1696 switch (expr->operation) {
1697 case ir_binop_less:
1698 switch_order = false;
1699 negate = zero_on_left;
1700 break;
1701
1702 case ir_binop_greater:
1703 switch_order = false;
1704 negate = !zero_on_left;
1705 break;
1706
1707 case ir_binop_lequal:
1708 switch_order = true;
1709 negate = !zero_on_left;
1710 break;
1711
1712 case ir_binop_gequal:
1713 switch_order = true;
1714 negate = zero_on_left;
1715 break;
1716
1717 default:
1718 /* This isn't the right kind of comparison afterall, so make sure
1719 * the whole condition is visited.
1720 */
1721 src_ir = ir;
1722 break;
1723 }
1724 }
1725 }
1726
1727 src_ir->accept(this);
1728
1729 /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
1730 * condition we produced is 0.0 or 1.0. By flipping the sign, we can
1731 * choose which value OPCODE_CMP produces without an extra instruction
1732 * computing the condition.
1733 */
1734 if (negate)
1735 this->result.negate = ~this->result.negate;
1736
1737 return switch_order;
1738 }
1739
1740 void
1741 glsl_to_tgsi_visitor::visit(ir_assignment *ir)
1742 {
1743 st_dst_reg l;
1744 st_src_reg r;
1745 int i;
1746
1747 ir->rhs->accept(this);
1748 r = this->result;
1749
1750 l = get_assignment_lhs(ir->lhs, this);
1751
1752 /* FINISHME: This should really set to the correct maximal writemask for each
1753 * FINISHME: component written (in the loops below). This case can only
1754 * FINISHME: occur for matrices, arrays, and structures.
1755 */
1756 if (ir->write_mask == 0) {
1757 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
1758 l.writemask = WRITEMASK_XYZW;
1759 } else if (ir->lhs->type->is_scalar()) {
1760 /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the
1761 * FINISHME: W component of fragment shader output zero, work correctly.
1762 */
1763 l.writemask = WRITEMASK_XYZW;
1764 } else {
1765 int swizzles[4];
1766 int first_enabled_chan = 0;
1767 int rhs_chan = 0;
1768
1769 assert(ir->lhs->type->is_vector());
1770 l.writemask = ir->write_mask;
1771
1772 for (int i = 0; i < 4; i++) {
1773 if (l.writemask & (1 << i)) {
1774 first_enabled_chan = GET_SWZ(r.swizzle, i);
1775 break;
1776 }
1777 }
1778
1779 /* Swizzle a small RHS vector into the channels being written.
1780 *
1781 * glsl ir treats write_mask as dictating how many channels are
1782 * present on the RHS while Mesa IR treats write_mask as just
1783 * showing which channels of the vec4 RHS get written.
1784 */
1785 for (int i = 0; i < 4; i++) {
1786 if (l.writemask & (1 << i))
1787 swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++);
1788 else
1789 swizzles[i] = first_enabled_chan;
1790 }
1791 r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1],
1792 swizzles[2], swizzles[3]);
1793 }
1794
1795 assert(l.file != PROGRAM_UNDEFINED);
1796 assert(r.file != PROGRAM_UNDEFINED);
1797
1798 if (ir->condition) {
1799 const bool switch_order = this->process_move_condition(ir->condition);
1800 st_src_reg condition = this->result;
1801
1802 for (i = 0; i < type_size(ir->lhs->type); i++) {
1803 if (switch_order) {
1804 emit(ir, OPCODE_CMP, l, condition, st_src_reg(l), r);
1805 } else {
1806 emit(ir, OPCODE_CMP, l, condition, r, st_src_reg(l));
1807 }
1808
1809 l.index++;
1810 r.index++;
1811 }
1812 } else {
1813 for (i = 0; i < type_size(ir->lhs->type); i++) {
1814 emit(ir, OPCODE_MOV, l, r);
1815 l.index++;
1816 r.index++;
1817 }
1818 }
1819 }
1820
1821
1822 void
1823 glsl_to_tgsi_visitor::visit(ir_constant *ir)
1824 {
1825 st_src_reg src;
1826 GLfloat stack_vals[4] = { 0 };
1827 GLfloat *values = stack_vals;
1828 unsigned int i;
1829
1830 /* Unfortunately, 4 floats is all we can get into
1831 * _mesa_add_unnamed_constant. So, make a temp to store an
1832 * aggregate constant and move each constant value into it. If we
1833 * get lucky, copy propagation will eliminate the extra moves.
1834 */
1835
1836 if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1837 st_src_reg temp_base = get_temp(ir->type);
1838 st_dst_reg temp = st_dst_reg(temp_base);
1839
1840 foreach_iter(exec_list_iterator, iter, ir->components) {
1841 ir_constant *field_value = (ir_constant *)iter.get();
1842 int size = type_size(field_value->type);
1843
1844 assert(size > 0);
1845
1846 field_value->accept(this);
1847 src = this->result;
1848
1849 for (i = 0; i < (unsigned int)size; i++) {
1850 emit(ir, OPCODE_MOV, temp, src);
1851
1852 src.index++;
1853 temp.index++;
1854 }
1855 }
1856 this->result = temp_base;
1857 return;
1858 }
1859
1860 if (ir->type->is_array()) {
1861 st_src_reg temp_base = get_temp(ir->type);
1862 st_dst_reg temp = st_dst_reg(temp_base);
1863 int size = type_size(ir->type->fields.array);
1864
1865 assert(size > 0);
1866
1867 for (i = 0; i < ir->type->length; i++) {
1868 ir->array_elements[i]->accept(this);
1869 src = this->result;
1870 for (int j = 0; j < size; j++) {
1871 emit(ir, OPCODE_MOV, temp, src);
1872
1873 src.index++;
1874 temp.index++;
1875 }
1876 }
1877 this->result = temp_base;
1878 return;
1879 }
1880
1881 if (ir->type->is_matrix()) {
1882 st_src_reg mat = get_temp(ir->type);
1883 st_dst_reg mat_column = st_dst_reg(mat);
1884
1885 for (i = 0; i < ir->type->matrix_columns; i++) {
1886 assert(ir->type->base_type == GLSL_TYPE_FLOAT);
1887 values = &ir->value.f[i * ir->type->vector_elements];
1888
1889 src = st_src_reg(PROGRAM_CONSTANT, -1, NULL);
1890 src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
1891 values,
1892 ir->type->vector_elements,
1893 &src.swizzle);
1894 emit(ir, OPCODE_MOV, mat_column, src);
1895
1896 mat_column.index++;
1897 }
1898
1899 this->result = mat;
1900 return;
1901 }
1902
1903 src.file = PROGRAM_CONSTANT;
1904 switch (ir->type->base_type) {
1905 case GLSL_TYPE_FLOAT:
1906 values = &ir->value.f[0];
1907 break;
1908 case GLSL_TYPE_UINT:
1909 for (i = 0; i < ir->type->vector_elements; i++) {
1910 values[i] = ir->value.u[i];
1911 }
1912 break;
1913 case GLSL_TYPE_INT:
1914 for (i = 0; i < ir->type->vector_elements; i++) {
1915 values[i] = ir->value.i[i];
1916 }
1917 break;
1918 case GLSL_TYPE_BOOL:
1919 for (i = 0; i < ir->type->vector_elements; i++) {
1920 values[i] = ir->value.b[i];
1921 }
1922 break;
1923 default:
1924 assert(!"Non-float/uint/int/bool constant");
1925 }
1926
1927 this->result = st_src_reg(PROGRAM_CONSTANT, -1, ir->type);
1928 this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters,
1929 values,
1930 ir->type->vector_elements,
1931 &this->result.swizzle);
1932 }
1933
1934 function_entry *
1935 glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig)
1936 {
1937 function_entry *entry;
1938
1939 foreach_iter(exec_list_iterator, iter, this->function_signatures) {
1940 entry = (function_entry *)iter.get();
1941
1942 if (entry->sig == sig)
1943 return entry;
1944 }
1945
1946 entry = ralloc(mem_ctx, function_entry);
1947 entry->sig = sig;
1948 entry->sig_id = this->next_signature_id++;
1949 entry->bgn_inst = NULL;
1950
1951 /* Allocate storage for all the parameters. */
1952 foreach_iter(exec_list_iterator, iter, sig->parameters) {
1953 ir_variable *param = (ir_variable *)iter.get();
1954 variable_storage *storage;
1955
1956 storage = find_variable_storage(param);
1957 assert(!storage);
1958
1959 storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY,
1960 this->next_temp);
1961 this->variables.push_tail(storage);
1962
1963 this->next_temp += type_size(param->type);
1964 }
1965
1966 if (!sig->return_type->is_void()) {
1967 entry->return_reg = get_temp(sig->return_type);
1968 } else {
1969 entry->return_reg = undef_src;
1970 }
1971
1972 this->function_signatures.push_tail(entry);
1973 return entry;
1974 }
1975
1976 void
1977 glsl_to_tgsi_visitor::visit(ir_call *ir)
1978 {
1979 glsl_to_tgsi_instruction *call_inst;
1980 ir_function_signature *sig = ir->get_callee();
1981 function_entry *entry = get_function_signature(sig);
1982 int i;
1983
1984 /* Process in parameters. */
1985 exec_list_iterator sig_iter = sig->parameters.iterator();
1986 foreach_iter(exec_list_iterator, iter, *ir) {
1987 ir_rvalue *param_rval = (ir_rvalue *)iter.get();
1988 ir_variable *param = (ir_variable *)sig_iter.get();
1989
1990 if (param->mode == ir_var_in ||
1991 param->mode == ir_var_inout) {
1992 variable_storage *storage = find_variable_storage(param);
1993 assert(storage);
1994
1995 param_rval->accept(this);
1996 st_src_reg r = this->result;
1997
1998 st_dst_reg l;
1999 l.file = storage->file;
2000 l.index = storage->index;
2001 l.reladdr = NULL;
2002 l.writemask = WRITEMASK_XYZW;
2003 l.cond_mask = COND_TR;
2004
2005 for (i = 0; i < type_size(param->type); i++) {
2006 emit(ir, OPCODE_MOV, l, r);
2007 l.index++;
2008 r.index++;
2009 }
2010 }
2011
2012 sig_iter.next();
2013 }
2014 assert(!sig_iter.has_next());
2015
2016 /* Emit call instruction */
2017 call_inst = emit(ir, OPCODE_CAL);
2018 call_inst->function = entry;
2019
2020 /* Process out parameters. */
2021 sig_iter = sig->parameters.iterator();
2022 foreach_iter(exec_list_iterator, iter, *ir) {
2023 ir_rvalue *param_rval = (ir_rvalue *)iter.get();
2024 ir_variable *param = (ir_variable *)sig_iter.get();
2025
2026 if (param->mode == ir_var_out ||
2027 param->mode == ir_var_inout) {
2028 variable_storage *storage = find_variable_storage(param);
2029 assert(storage);
2030
2031 st_src_reg r;
2032 r.file = storage->file;
2033 r.index = storage->index;
2034 r.reladdr = NULL;
2035 r.swizzle = SWIZZLE_NOOP;
2036 r.negate = 0;
2037
2038 param_rval->accept(this);
2039 st_dst_reg l = st_dst_reg(this->result);
2040
2041 for (i = 0; i < type_size(param->type); i++) {
2042 emit(ir, OPCODE_MOV, l, r);
2043 l.index++;
2044 r.index++;
2045 }
2046 }
2047
2048 sig_iter.next();
2049 }
2050 assert(!sig_iter.has_next());
2051
2052 /* Process return value. */
2053 this->result = entry->return_reg;
2054 }
2055
2056 void
2057 glsl_to_tgsi_visitor::visit(ir_texture *ir)
2058 {
2059 st_src_reg result_src, coord, lod_info, projector, dx, dy;
2060 st_dst_reg result_dst, coord_dst;
2061 glsl_to_tgsi_instruction *inst = NULL;
2062 prog_opcode opcode = OPCODE_NOP;
2063
2064 ir->coordinate->accept(this);
2065
2066 /* Put our coords in a temp. We'll need to modify them for shadow,
2067 * projection, or LOD, so the only case we'd use it as is is if
2068 * we're doing plain old texturing. Mesa IR optimization should
2069 * handle cleaning up our mess in that case.
2070 */
2071 coord = get_temp(glsl_type::vec4_type);
2072 coord_dst = st_dst_reg(coord);
2073 emit(ir, OPCODE_MOV, coord_dst, this->result);
2074
2075 if (ir->projector) {
2076 ir->projector->accept(this);
2077 projector = this->result;
2078 }
2079
2080 /* Storage for our result. Ideally for an assignment we'd be using
2081 * the actual storage for the result here, instead.
2082 */
2083 result_src = get_temp(glsl_type::vec4_type);
2084 result_dst = st_dst_reg(result_src);
2085
2086 switch (ir->op) {
2087 case ir_tex:
2088 opcode = OPCODE_TEX;
2089 break;
2090 case ir_txb:
2091 opcode = OPCODE_TXB;
2092 ir->lod_info.bias->accept(this);
2093 lod_info = this->result;
2094 break;
2095 case ir_txl:
2096 opcode = OPCODE_TXL;
2097 ir->lod_info.lod->accept(this);
2098 lod_info = this->result;
2099 break;
2100 case ir_txd:
2101 opcode = OPCODE_TXD;
2102 ir->lod_info.grad.dPdx->accept(this);
2103 dx = this->result;
2104 ir->lod_info.grad.dPdy->accept(this);
2105 dy = this->result;
2106 break;
2107 case ir_txf: // TODO: use TGSI_OPCODE_TXF here
2108 assert(!"GLSL 1.30 features unsupported");
2109 break;
2110 }
2111
2112 if (ir->projector) {
2113 if (opcode == OPCODE_TEX) {
2114 /* Slot the projector in as the last component of the coord. */
2115 coord_dst.writemask = WRITEMASK_W;
2116 emit(ir, OPCODE_MOV, coord_dst, projector);
2117 coord_dst.writemask = WRITEMASK_XYZW;
2118 opcode = OPCODE_TXP;
2119 } else {
2120 st_src_reg coord_w = coord;
2121 coord_w.swizzle = SWIZZLE_WWWW;
2122
2123 /* For the other TEX opcodes there's no projective version
2124 * since the last slot is taken up by lod info. Do the
2125 * projective divide now.
2126 */
2127 coord_dst.writemask = WRITEMASK_W;
2128 emit(ir, OPCODE_RCP, coord_dst, projector);
2129
2130 /* In the case where we have to project the coordinates "by hand,"
2131 * the shadow comparitor value must also be projected.
2132 */
2133 st_src_reg tmp_src = coord;
2134 if (ir->shadow_comparitor) {
2135 /* Slot the shadow value in as the second to last component of the
2136 * coord.
2137 */
2138 ir->shadow_comparitor->accept(this);
2139
2140 tmp_src = get_temp(glsl_type::vec4_type);
2141 st_dst_reg tmp_dst = st_dst_reg(tmp_src);
2142
2143 tmp_dst.writemask = WRITEMASK_Z;
2144 emit(ir, OPCODE_MOV, tmp_dst, this->result);
2145
2146 tmp_dst.writemask = WRITEMASK_XY;
2147 emit(ir, OPCODE_MOV, tmp_dst, coord);
2148 }
2149
2150 coord_dst.writemask = WRITEMASK_XYZ;
2151 emit(ir, OPCODE_MUL, coord_dst, tmp_src, coord_w);
2152
2153 coord_dst.writemask = WRITEMASK_XYZW;
2154 coord.swizzle = SWIZZLE_XYZW;
2155 }
2156 }
2157
2158 /* If projection is done and the opcode is not OPCODE_TXP, then the shadow
2159 * comparitor was put in the correct place (and projected) by the code,
2160 * above, that handles by-hand projection.
2161 */
2162 if (ir->shadow_comparitor && (!ir->projector || opcode == OPCODE_TXP)) {
2163 /* Slot the shadow value in as the second to last component of the
2164 * coord.
2165 */
2166 ir->shadow_comparitor->accept(this);
2167 coord_dst.writemask = WRITEMASK_Z;
2168 emit(ir, OPCODE_MOV, coord_dst, this->result);
2169 coord_dst.writemask = WRITEMASK_XYZW;
2170 }
2171
2172 if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) {
2173 /* Mesa IR stores lod or lod bias in the last channel of the coords. */
2174 coord_dst.writemask = WRITEMASK_W;
2175 emit(ir, OPCODE_MOV, coord_dst, lod_info);
2176 coord_dst.writemask = WRITEMASK_XYZW;
2177 }
2178
2179 if (opcode == OPCODE_TXD)
2180 inst = emit(ir, opcode, result_dst, coord, dx, dy);
2181 else
2182 inst = emit(ir, opcode, result_dst, coord);
2183
2184 if (ir->shadow_comparitor)
2185 inst->tex_shadow = GL_TRUE;
2186
2187 inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler,
2188 this->shader_program,
2189 this->prog);
2190
2191 const glsl_type *sampler_type = ir->sampler->type;
2192
2193 switch (sampler_type->sampler_dimensionality) {
2194 case GLSL_SAMPLER_DIM_1D:
2195 inst->tex_target = (sampler_type->sampler_array)
2196 ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
2197 break;
2198 case GLSL_SAMPLER_DIM_2D:
2199 inst->tex_target = (sampler_type->sampler_array)
2200 ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
2201 break;
2202 case GLSL_SAMPLER_DIM_3D:
2203 inst->tex_target = TEXTURE_3D_INDEX;
2204 break;
2205 case GLSL_SAMPLER_DIM_CUBE:
2206 inst->tex_target = TEXTURE_CUBE_INDEX;
2207 break;
2208 case GLSL_SAMPLER_DIM_RECT:
2209 inst->tex_target = TEXTURE_RECT_INDEX;
2210 break;
2211 case GLSL_SAMPLER_DIM_BUF:
2212 assert(!"FINISHME: Implement ARB_texture_buffer_object");
2213 break;
2214 default:
2215 assert(!"Should not get here.");
2216 }
2217
2218 this->result = result_src;
2219 }
2220
2221 void
2222 glsl_to_tgsi_visitor::visit(ir_return *ir)
2223 {
2224 if (ir->get_value()) {
2225 st_dst_reg l;
2226 int i;
2227
2228 assert(current_function);
2229
2230 ir->get_value()->accept(this);
2231 st_src_reg r = this->result;
2232
2233 l = st_dst_reg(current_function->return_reg);
2234
2235 for (i = 0; i < type_size(current_function->sig->return_type); i++) {
2236 emit(ir, OPCODE_MOV, l, r);
2237 l.index++;
2238 r.index++;
2239 }
2240 }
2241
2242 emit(ir, OPCODE_RET);
2243 }
2244
2245 void
2246 glsl_to_tgsi_visitor::visit(ir_discard *ir)
2247 {
2248 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
2249
2250 if (ir->condition) {
2251 ir->condition->accept(this);
2252 this->result.negate = ~this->result.negate;
2253 emit(ir, OPCODE_KIL, undef_dst, this->result);
2254 } else {
2255 emit(ir, OPCODE_KIL_NV);
2256 }
2257
2258 fp->UsesKill = GL_TRUE;
2259 }
2260
2261 void
2262 glsl_to_tgsi_visitor::visit(ir_if *ir)
2263 {
2264 glsl_to_tgsi_instruction *cond_inst, *if_inst, *else_inst = NULL;
2265 glsl_to_tgsi_instruction *prev_inst;
2266
2267 prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
2268
2269 ir->condition->accept(this);
2270 assert(this->result.file != PROGRAM_UNDEFINED);
2271
2272 if (this->options->EmitCondCodes) {
2273 cond_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
2274
2275 /* See if we actually generated any instruction for generating
2276 * the condition. If not, then cook up a move to a temp so we
2277 * have something to set cond_update on.
2278 */
2279 if (cond_inst == prev_inst) {
2280 st_src_reg temp = get_temp(glsl_type::bool_type);
2281 cond_inst = emit(ir->condition, OPCODE_MOV, st_dst_reg(temp), result);
2282 }
2283 cond_inst->cond_update = GL_TRUE;
2284
2285 if_inst = emit(ir->condition, OPCODE_IF);
2286 if_inst->dst.cond_mask = COND_NE;
2287 } else {
2288 if_inst = emit(ir->condition, OPCODE_IF, undef_dst, this->result);
2289 }
2290
2291 this->instructions.push_tail(if_inst);
2292
2293 visit_exec_list(&ir->then_instructions, this);
2294
2295 if (!ir->else_instructions.is_empty()) {
2296 else_inst = emit(ir->condition, OPCODE_ELSE);
2297 visit_exec_list(&ir->else_instructions, this);
2298 }
2299
2300 if_inst = emit(ir->condition, OPCODE_ENDIF);
2301 }
2302
2303 glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
2304 {
2305 result.file = PROGRAM_UNDEFINED;
2306 next_temp = 1;
2307 next_signature_id = 1;
2308 current_function = NULL;
2309 num_address_regs = 0;
2310 indirect_addr_temps = false;
2311 indirect_addr_consts = false;
2312 mem_ctx = ralloc_context(NULL);
2313 }
2314
2315 glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor()
2316 {
2317 ralloc_free(mem_ctx);
2318 }
2319
2320 extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v)
2321 {
2322 delete v;
2323 }
2324
2325
2326 /**
2327 * Count resources used by the given gpu program (number of texture
2328 * samplers, etc).
2329 */
2330 static void
2331 count_resources(glsl_to_tgsi_visitor *v)
2332 {
2333 v->samplers_used = 0;
2334
2335 foreach_iter(exec_list_iterator, iter, v->instructions) {
2336 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
2337
2338 if (_mesa_is_tex_instruction(inst->op)) {
2339 v->samplers_used |= 1 << inst->sampler;
2340 }
2341 }
2342 }
2343
2344
2345 /**
2346 * Check if the given vertex/fragment/shader program is within the
2347 * resource limits of the context (number of texture units, etc).
2348 * If any of those checks fail, record a linker error.
2349 *
2350 * XXX more checks are needed...
2351 */
2352 static void
2353 check_resources(const struct gl_context *ctx,
2354 struct gl_shader_program *shader_program,
2355 glsl_to_tgsi_visitor *prog,
2356 struct gl_program *proginfo)
2357 {
2358 switch (proginfo->Target) {
2359 case GL_VERTEX_PROGRAM_ARB:
2360 if (_mesa_bitcount(prog->samplers_used) >
2361 ctx->Const.MaxVertexTextureImageUnits) {
2362 fail_link(shader_program, "Too many vertex shader texture samplers");
2363 }
2364 if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) {
2365 fail_link(shader_program, "Too many vertex shader constants");
2366 }
2367 break;
2368 case MESA_GEOMETRY_PROGRAM:
2369 if (_mesa_bitcount(prog->samplers_used) >
2370 ctx->Const.MaxGeometryTextureImageUnits) {
2371 fail_link(shader_program, "Too many geometry shader texture samplers");
2372 }
2373 if (proginfo->Parameters->NumParameters >
2374 MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) {
2375 fail_link(shader_program, "Too many geometry shader constants");
2376 }
2377 break;
2378 case GL_FRAGMENT_PROGRAM_ARB:
2379 if (_mesa_bitcount(prog->samplers_used) >
2380 ctx->Const.MaxTextureImageUnits) {
2381 fail_link(shader_program, "Too many fragment shader texture samplers");
2382 }
2383 if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) {
2384 fail_link(shader_program, "Too many fragment shader constants");
2385 }
2386 break;
2387 default:
2388 _mesa_problem(ctx, "unexpected program type in check_resources()");
2389 }
2390 }
2391
2392
2393
2394 struct uniform_sort {
2395 struct gl_uniform *u;
2396 int pos;
2397 };
2398
2399 /* The shader_program->Uniforms list is almost sorted in increasing
2400 * uniform->{Frag,Vert}Pos locations, but not quite when there are
2401 * uniforms shared between targets. We need to add parameters in
2402 * increasing order for the targets.
2403 */
2404 static int
2405 sort_uniforms(const void *a, const void *b)
2406 {
2407 struct uniform_sort *u1 = (struct uniform_sort *)a;
2408 struct uniform_sort *u2 = (struct uniform_sort *)b;
2409
2410 return u1->pos - u2->pos;
2411 }
2412
2413 /* Add the uniforms to the parameters. The linker chose locations
2414 * in our parameters lists (which weren't created yet), which the
2415 * uniforms code will use to poke values into our parameters list
2416 * when uniforms are updated.
2417 */
2418 static void
2419 add_uniforms_to_parameters_list(struct gl_shader_program *shader_program,
2420 struct gl_shader *shader,
2421 struct gl_program *prog)
2422 {
2423 unsigned int i;
2424 unsigned int next_sampler = 0, num_uniforms = 0;
2425 struct uniform_sort *sorted_uniforms;
2426
2427 sorted_uniforms = ralloc_array(NULL, struct uniform_sort,
2428 shader_program->Uniforms->NumUniforms);
2429
2430 for (i = 0; i < shader_program->Uniforms->NumUniforms; i++) {
2431 struct gl_uniform *uniform = shader_program->Uniforms->Uniforms + i;
2432 int parameter_index = -1;
2433
2434 switch (shader->Type) {
2435 case GL_VERTEX_SHADER:
2436 parameter_index = uniform->VertPos;
2437 break;
2438 case GL_FRAGMENT_SHADER:
2439 parameter_index = uniform->FragPos;
2440 break;
2441 case GL_GEOMETRY_SHADER:
2442 parameter_index = uniform->GeomPos;
2443 break;
2444 }
2445
2446 /* Only add uniforms used in our target. */
2447 if (parameter_index != -1) {
2448 sorted_uniforms[num_uniforms].pos = parameter_index;
2449 sorted_uniforms[num_uniforms].u = uniform;
2450 num_uniforms++;
2451 }
2452 }
2453
2454 qsort(sorted_uniforms, num_uniforms, sizeof(struct uniform_sort),
2455 sort_uniforms);
2456
2457 for (i = 0; i < num_uniforms; i++) {
2458 struct gl_uniform *uniform = sorted_uniforms[i].u;
2459 int parameter_index = sorted_uniforms[i].pos;
2460 const glsl_type *type = uniform->Type;
2461 unsigned int size;
2462
2463 if (type->is_vector() ||
2464 type->is_scalar()) {
2465 size = type->vector_elements;
2466 } else {
2467 size = type_size(type) * 4;
2468 }
2469
2470 gl_register_file file;
2471 if (type->is_sampler() ||
2472 (type->is_array() && type->fields.array->is_sampler())) {
2473 file = PROGRAM_SAMPLER;
2474 } else {
2475 file = PROGRAM_UNIFORM;
2476 }
2477
2478 GLint index = _mesa_lookup_parameter_index(prog->Parameters, -1,
2479 uniform->Name);
2480
2481 if (index < 0) {
2482 index = _mesa_add_parameter(prog->Parameters, file,
2483 uniform->Name, size, type->gl_type,
2484 NULL, NULL, 0x0);
2485
2486 /* Sampler uniform values are stored in prog->SamplerUnits,
2487 * and the entry in that array is selected by this index we
2488 * store in ParameterValues[].
2489 */
2490 if (file == PROGRAM_SAMPLER) {
2491 for (unsigned int j = 0; j < size / 4; j++)
2492 prog->Parameters->ParameterValues[index + j][0] = next_sampler++;
2493 }
2494
2495 /* The location chosen in the Parameters list here (returned
2496 * from _mesa_add_uniform) has to match what the linker chose.
2497 */
2498 if (index != parameter_index) {
2499 fail_link(shader_program, "Allocation of uniform `%s' to target "
2500 "failed (%d vs %d)\n",
2501 uniform->Name, index, parameter_index);
2502 }
2503 }
2504 }
2505
2506 ralloc_free(sorted_uniforms);
2507 }
2508
2509 static void
2510 set_uniform_initializer(struct gl_context *ctx, void *mem_ctx,
2511 struct gl_shader_program *shader_program,
2512 const char *name, const glsl_type *type,
2513 ir_constant *val)
2514 {
2515 if (type->is_record()) {
2516 ir_constant *field_constant;
2517
2518 field_constant = (ir_constant *)val->components.get_head();
2519
2520 for (unsigned int i = 0; i < type->length; i++) {
2521 const glsl_type *field_type = type->fields.structure[i].type;
2522 const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name,
2523 type->fields.structure[i].name);
2524 set_uniform_initializer(ctx, mem_ctx, shader_program, field_name,
2525 field_type, field_constant);
2526 field_constant = (ir_constant *)field_constant->next;
2527 }
2528 return;
2529 }
2530
2531 int loc = _mesa_get_uniform_location(ctx, shader_program, name);
2532
2533 if (loc == -1) {
2534 fail_link(shader_program,
2535 "Couldn't find uniform for initializer %s\n", name);
2536 return;
2537 }
2538
2539 for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) {
2540 ir_constant *element;
2541 const glsl_type *element_type;
2542 if (type->is_array()) {
2543 element = val->array_elements[i];
2544 element_type = type->fields.array;
2545 } else {
2546 element = val;
2547 element_type = type;
2548 }
2549
2550 void *values;
2551
2552 if (element_type->base_type == GLSL_TYPE_BOOL) {
2553 int *conv = ralloc_array(mem_ctx, int, element_type->components());
2554 for (unsigned int j = 0; j < element_type->components(); j++) {
2555 conv[j] = element->value.b[j];
2556 }
2557 values = (void *)conv;
2558 element_type = glsl_type::get_instance(GLSL_TYPE_INT,
2559 element_type->vector_elements,
2560 1);
2561 } else {
2562 values = &element->value;
2563 }
2564
2565 if (element_type->is_matrix()) {
2566 _mesa_uniform_matrix(ctx, shader_program,
2567 element_type->matrix_columns,
2568 element_type->vector_elements,
2569 loc, 1, GL_FALSE, (GLfloat *)values);
2570 loc += element_type->matrix_columns;
2571 } else {
2572 _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns,
2573 values, element_type->gl_type);
2574 loc += type_size(element_type);
2575 }
2576 }
2577 }
2578
2579 static void
2580 set_uniform_initializers(struct gl_context *ctx,
2581 struct gl_shader_program *shader_program)
2582 {
2583 void *mem_ctx = NULL;
2584
2585 for (unsigned int i = 0; i < MESA_SHADER_TYPES; i++) {
2586 struct gl_shader *shader = shader_program->_LinkedShaders[i];
2587
2588 if (shader == NULL)
2589 continue;
2590
2591 foreach_iter(exec_list_iterator, iter, *shader->ir) {
2592 ir_instruction *ir = (ir_instruction *)iter.get();
2593 ir_variable *var = ir->as_variable();
2594
2595 if (!var || var->mode != ir_var_uniform || !var->constant_value)
2596 continue;
2597
2598 if (!mem_ctx)
2599 mem_ctx = ralloc_context(NULL);
2600
2601 set_uniform_initializer(ctx, mem_ctx, shader_program, var->name,
2602 var->type, var->constant_value);
2603 }
2604 }
2605
2606 ralloc_free(mem_ctx);
2607 }
2608
2609 /*
2610 * Scan/rewrite program to remove reads of custom (output) registers.
2611 * The passed type has to be either PROGRAM_OUTPUT or PROGRAM_VARYING
2612 * (for vertex shaders).
2613 * In GLSL shaders, varying vars can be read and written.
2614 * On some hardware, trying to read an output register causes trouble.
2615 * So, rewrite the program to use a temporary register in this case.
2616 *
2617 * Based on _mesa_remove_output_reads from programopt.c.
2618 */
2619 void
2620 glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type)
2621 {
2622 GLuint i;
2623 GLint outputMap[VERT_RESULT_MAX];
2624 GLuint numVaryingReads = 0;
2625 GLboolean usedTemps[MAX_PROGRAM_TEMPS];
2626 GLuint firstTemp = 0;
2627
2628 _mesa_find_used_registers(prog, PROGRAM_TEMPORARY,
2629 usedTemps, MAX_PROGRAM_TEMPS);
2630
2631 assert(type == PROGRAM_VARYING || type == PROGRAM_OUTPUT);
2632 assert(prog->Target == GL_VERTEX_PROGRAM_ARB || type != PROGRAM_VARYING);
2633
2634 for (i = 0; i < VERT_RESULT_MAX; i++)
2635 outputMap[i] = -1;
2636
2637 /* look for instructions which read from varying vars */
2638 foreach_iter(exec_list_iterator, iter, this->instructions) {
2639 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
2640 const GLuint numSrc = _mesa_num_inst_src_regs(inst->op);
2641 GLuint j;
2642 for (j = 0; j < numSrc; j++) {
2643 if (inst->src[j].file == type) {
2644 /* replace the read with a temp reg */
2645 const GLuint var = inst->src[j].index;
2646 if (outputMap[var] == -1) {
2647 numVaryingReads++;
2648 outputMap[var] = _mesa_find_free_register(usedTemps,
2649 MAX_PROGRAM_TEMPS,
2650 firstTemp);
2651 firstTemp = outputMap[var] + 1;
2652 }
2653 inst->src[j].file = PROGRAM_TEMPORARY;
2654 inst->src[j].index = outputMap[var];
2655 }
2656 }
2657 }
2658
2659 if (numVaryingReads == 0)
2660 return; /* nothing to be done */
2661
2662 /* look for instructions which write to the varying vars identified above */
2663 foreach_iter(exec_list_iterator, iter, this->instructions) {
2664 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
2665 if (inst->dst.file == type && outputMap[inst->dst.index] >= 0) {
2666 /* change inst to write to the temp reg, instead of the varying */
2667 inst->dst.file = PROGRAM_TEMPORARY;
2668 inst->dst.index = outputMap[inst->dst.index];
2669 }
2670 }
2671
2672 /* insert new MOV instructions at the end */
2673 for (i = 0; i < VERT_RESULT_MAX; i++) {
2674 if (outputMap[i] >= 0) {
2675 /* MOV VAR[i], TEMP[tmp]; */
2676 st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i]);
2677 st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW);
2678 dst.index = i;
2679 this->emit(NULL, OPCODE_MOV, dst, src);
2680 }
2681 }
2682 }
2683
2684 /* Replaces all references to a temporary register index with another index. */
2685 void
2686 glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index)
2687 {
2688 foreach_iter(exec_list_iterator, iter, this->instructions) {
2689 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
2690 unsigned j;
2691
2692 for (j=0; j < _mesa_num_inst_src_regs(inst->op); j++) {
2693 if (inst->src[j].file == PROGRAM_TEMPORARY &&
2694 inst->src[j].index == index) {
2695 inst->src[j].index = new_index;
2696 }
2697 }
2698
2699 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
2700 inst->dst.index = new_index;
2701 }
2702 }
2703 }
2704
2705 int
2706 glsl_to_tgsi_visitor::get_first_temp_read(int index)
2707 {
2708 int depth = 0; /* loop depth */
2709 int loop_start = -1; /* index of the first active BGNLOOP (if any) */
2710 unsigned i = 0, j;
2711
2712 foreach_iter(exec_list_iterator, iter, this->instructions) {
2713 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
2714
2715 for (j=0; j < _mesa_num_inst_src_regs(inst->op); j++) {
2716 if (inst->src[j].file == PROGRAM_TEMPORARY &&
2717 inst->src[j].index == index) {
2718 return (depth == 0) ? i : loop_start;
2719 }
2720 }
2721
2722 if (inst->op == OPCODE_BGNLOOP) {
2723 if(depth++ == 0)
2724 loop_start = i;
2725 } else if (inst->op == OPCODE_ENDLOOP) {
2726 if (--depth == 0)
2727 loop_start = -1;
2728 }
2729 assert(depth >= 0);
2730
2731 i++;
2732 }
2733
2734 return -1;
2735 }
2736
2737 int
2738 glsl_to_tgsi_visitor::get_first_temp_write(int index)
2739 {
2740 int depth = 0; /* loop depth */
2741 int loop_start = -1; /* index of the first active BGNLOOP (if any) */
2742 int i = 0;
2743
2744 foreach_iter(exec_list_iterator, iter, this->instructions) {
2745 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
2746
2747 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
2748 return (depth == 0) ? i : loop_start;
2749 }
2750
2751 if (inst->op == OPCODE_BGNLOOP) {
2752 if(depth++ == 0)
2753 loop_start = i;
2754 } else if (inst->op == OPCODE_ENDLOOP) {
2755 if (--depth == 0)
2756 loop_start = -1;
2757 }
2758 assert(depth >= 0);
2759
2760 i++;
2761 }
2762
2763 return -1;
2764 }
2765
2766 int
2767 glsl_to_tgsi_visitor::get_last_temp_read(int index)
2768 {
2769 int depth = 0; /* loop depth */
2770 int last = -1; /* index of last instruction that reads the temporary */
2771 unsigned i = 0, j;
2772
2773 foreach_iter(exec_list_iterator, iter, this->instructions) {
2774 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
2775
2776 for (j=0; j < _mesa_num_inst_src_regs(inst->op); j++) {
2777 if (inst->src[j].file == PROGRAM_TEMPORARY &&
2778 inst->src[j].index == index) {
2779 last = (depth == 0) ? i : -2;
2780 }
2781 }
2782
2783 if (inst->op == OPCODE_BGNLOOP)
2784 depth++;
2785 else if (inst->op == OPCODE_ENDLOOP)
2786 if (--depth == 0 && last == -2)
2787 last = i;
2788 assert(depth >= 0);
2789
2790 i++;
2791 }
2792
2793 assert(last >= -1);
2794 return last;
2795 }
2796
2797 int
2798 glsl_to_tgsi_visitor::get_last_temp_write(int index)
2799 {
2800 int depth = 0; /* loop depth */
2801 int last = -1; /* index of last instruction that writes to the temporary */
2802 int i = 0;
2803
2804 foreach_iter(exec_list_iterator, iter, this->instructions) {
2805 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
2806
2807 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index)
2808 last = (depth == 0) ? i : -2;
2809
2810 if (inst->op == OPCODE_BGNLOOP)
2811 depth++;
2812 else if (inst->op == OPCODE_ENDLOOP)
2813 if (--depth == 0 && last == -2)
2814 last = i;
2815 assert(depth >= 0);
2816
2817 i++;
2818 }
2819
2820 assert(last >= -1);
2821 return last;
2822 }
2823
2824 /*
2825 * On a basic block basis, tracks available PROGRAM_TEMPORARY register
2826 * channels for copy propagation and updates following instructions to
2827 * use the original versions.
2828 *
2829 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
2830 * will occur. As an example, a TXP production before this pass:
2831 *
2832 * 0: MOV TEMP[1], INPUT[4].xyyy;
2833 * 1: MOV TEMP[1].w, INPUT[4].wwww;
2834 * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
2835 *
2836 * and after:
2837 *
2838 * 0: MOV TEMP[1], INPUT[4].xyyy;
2839 * 1: MOV TEMP[1].w, INPUT[4].wwww;
2840 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
2841 *
2842 * which allows for dead code elimination on TEMP[1]'s writes.
2843 */
2844 void
2845 glsl_to_tgsi_visitor::copy_propagate(void)
2846 {
2847 glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx,
2848 glsl_to_tgsi_instruction *,
2849 this->next_temp * 4);
2850 int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
2851 int level = 0;
2852
2853 foreach_iter(exec_list_iterator, iter, this->instructions) {
2854 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
2855
2856 assert(inst->dst.file != PROGRAM_TEMPORARY
2857 || inst->dst.index < this->next_temp);
2858
2859 /* First, do any copy propagation possible into the src regs. */
2860 for (int r = 0; r < 3; r++) {
2861 glsl_to_tgsi_instruction *first = NULL;
2862 bool good = true;
2863 int acp_base = inst->src[r].index * 4;
2864
2865 if (inst->src[r].file != PROGRAM_TEMPORARY ||
2866 inst->src[r].reladdr)
2867 continue;
2868
2869 /* See if we can find entries in the ACP consisting of MOVs
2870 * from the same src register for all the swizzled channels
2871 * of this src register reference.
2872 */
2873 for (int i = 0; i < 4; i++) {
2874 int src_chan = GET_SWZ(inst->src[r].swizzle, i);
2875 glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan];
2876
2877 if (!copy_chan) {
2878 good = false;
2879 break;
2880 }
2881
2882 assert(acp_level[acp_base + src_chan] <= level);
2883
2884 if (!first) {
2885 first = copy_chan;
2886 } else {
2887 if (first->src[0].file != copy_chan->src[0].file ||
2888 first->src[0].index != copy_chan->src[0].index) {
2889 good = false;
2890 break;
2891 }
2892 }
2893 }
2894
2895 if (good) {
2896 /* We've now validated that we can copy-propagate to
2897 * replace this src register reference. Do it.
2898 */
2899 inst->src[r].file = first->src[0].file;
2900 inst->src[r].index = first->src[0].index;
2901
2902 int swizzle = 0;
2903 for (int i = 0; i < 4; i++) {
2904 int src_chan = GET_SWZ(inst->src[r].swizzle, i);
2905 glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan];
2906 swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) <<
2907 (3 * i));
2908 }
2909 inst->src[r].swizzle = swizzle;
2910 }
2911 }
2912
2913 switch (inst->op) {
2914 case OPCODE_BGNLOOP:
2915 case OPCODE_ENDLOOP:
2916 /* End of a basic block, clear the ACP entirely. */
2917 memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
2918 break;
2919
2920 case OPCODE_IF:
2921 ++level;
2922 break;
2923
2924 case OPCODE_ENDIF:
2925 case OPCODE_ELSE:
2926 /* Clear all channels written inside the block from the ACP, but
2927 * leaving those that were not touched.
2928 */
2929 for (int r = 0; r < this->next_temp; r++) {
2930 for (int c = 0; c < 4; c++) {
2931 if (!acp[4 * r + c])
2932 continue;
2933
2934 if (acp_level[4 * r + c] >= level)
2935 acp[4 * r + c] = NULL;
2936 }
2937 }
2938 if (inst->op == OPCODE_ENDIF)
2939 --level;
2940 break;
2941
2942 default:
2943 /* Continuing the block, clear any written channels from
2944 * the ACP.
2945 */
2946 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) {
2947 /* Any temporary might be written, so no copy propagation
2948 * across this instruction.
2949 */
2950 memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
2951 } else if (inst->dst.file == PROGRAM_OUTPUT &&
2952 inst->dst.reladdr) {
2953 /* Any output might be written, so no copy propagation
2954 * from outputs across this instruction.
2955 */
2956 for (int r = 0; r < this->next_temp; r++) {
2957 for (int c = 0; c < 4; c++) {
2958 if (!acp[4 * r + c])
2959 continue;
2960
2961 if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT)
2962 acp[4 * r + c] = NULL;
2963 }
2964 }
2965 } else if (inst->dst.file == PROGRAM_TEMPORARY ||
2966 inst->dst.file == PROGRAM_OUTPUT) {
2967 /* Clear where it's used as dst. */
2968 if (inst->dst.file == PROGRAM_TEMPORARY) {
2969 for (int c = 0; c < 4; c++) {
2970 if (inst->dst.writemask & (1 << c)) {
2971 acp[4 * inst->dst.index + c] = NULL;
2972 }
2973 }
2974 }
2975
2976 /* Clear where it's used as src. */
2977 for (int r = 0; r < this->next_temp; r++) {
2978 for (int c = 0; c < 4; c++) {
2979 if (!acp[4 * r + c])
2980 continue;
2981
2982 int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c);
2983
2984 if (acp[4 * r + c]->src[0].file == inst->dst.file &&
2985 acp[4 * r + c]->src[0].index == inst->dst.index &&
2986 inst->dst.writemask & (1 << src_chan))
2987 {
2988 acp[4 * r + c] = NULL;
2989 }
2990 }
2991 }
2992 }
2993 break;
2994 }
2995
2996 /* If this is a copy, add it to the ACP. */
2997 if (inst->op == OPCODE_MOV &&
2998 inst->dst.file == PROGRAM_TEMPORARY &&
2999 !inst->dst.reladdr &&
3000 !inst->saturate &&
3001 !inst->src[0].reladdr &&
3002 !inst->src[0].negate) {
3003 for (int i = 0; i < 4; i++) {
3004 if (inst->dst.writemask & (1 << i)) {
3005 acp[4 * inst->dst.index + i] = inst;
3006 acp_level[4 * inst->dst.index + i] = level;
3007 }
3008 }
3009 }
3010 }
3011
3012 ralloc_free(acp_level);
3013 ralloc_free(acp);
3014 }
3015
3016 /*
3017 * Tracks available PROGRAM_TEMPORARY registers for dead code elimination.
3018 *
3019 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
3020 * will occur. As an example, a TXP production after copy propagation but
3021 * before this pass:
3022 *
3023 * 0: MOV TEMP[1], INPUT[4].xyyy;
3024 * 1: MOV TEMP[1].w, INPUT[4].wwww;
3025 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
3026 *
3027 * and after this pass:
3028 *
3029 * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
3030 *
3031 * FIXME: assumes that all functions are inlined (no support for BGNSUB/ENDSUB)
3032 * FIXME: doesn't eliminate all dead code inside of loops; it steps around them
3033 */
3034 void
3035 glsl_to_tgsi_visitor::eliminate_dead_code(void)
3036 {
3037 int i;
3038
3039 for (i=0; i < this->next_temp; i++) {
3040 int last_read = get_last_temp_read(i);
3041 int j = 0;
3042
3043 foreach_iter(exec_list_iterator, iter, this->instructions) {
3044 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3045
3046 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == i &&
3047 j > last_read)
3048 {
3049 iter.remove();
3050 delete inst;
3051 }
3052
3053 j++;
3054 }
3055 }
3056 }
3057
3058 /* Merges temporary registers together where possible to reduce the number of
3059 * registers needed to run a program.
3060 *
3061 * Produces optimal code only after copy propagation and dead code elimination
3062 * have been run. */
3063 void
3064 glsl_to_tgsi_visitor::merge_registers(void)
3065 {
3066 int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp);
3067 int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp);
3068 int i, j;
3069
3070 /* Read the indices of the last read and first write to each temp register
3071 * into an array so that we don't have to traverse the instruction list as
3072 * much. */
3073 for (i=0; i < this->next_temp; i++) {
3074 last_reads[i] = get_last_temp_read(i);
3075 first_writes[i] = get_first_temp_write(i);
3076 }
3077
3078 /* Start looking for registers with non-overlapping usages that can be
3079 * merged together. */
3080 for (i=0; i < this->next_temp - 1; i++) {
3081 /* Don't touch unused registers. */
3082 if (last_reads[i] < 0 || first_writes[i] < 0) continue;
3083
3084 for (j=i+1; j < this->next_temp; j++) {
3085 /* Don't touch unused registers. */
3086 if (last_reads[j] < 0 || first_writes[j] < 0) continue;
3087
3088 /* We can merge the two registers if the first write to j is after or
3089 * in the same instruction as the last read from i. Note that the
3090 * register at index i will always be used earlier or at the same time
3091 * as the register at index j. */
3092 assert(first_writes[i] <= first_writes[j]);
3093 if (last_reads[i] <= first_writes[j]) {
3094 rename_temp_register(j, i); /* Replace all references to j with i.*/
3095
3096 /* Update the first_writes and last_reads arrays with the new
3097 * values for the merged register index, and mark the newly unused
3098 * register index as such. */
3099 last_reads[i] = last_reads[j];
3100 first_writes[j] = -1;
3101 last_reads[j] = -1;
3102 }
3103 }
3104 }
3105
3106 ralloc_free(last_reads);
3107 ralloc_free(first_writes);
3108 }
3109
3110 /* Reassign indices to temporary registers by reusing unused indices created
3111 * by optimization passes. */
3112 void
3113 glsl_to_tgsi_visitor::renumber_registers(void)
3114 {
3115 int i = 0;
3116 int new_index = 0;
3117
3118 for (i=0; i < this->next_temp; i++) {
3119 if (get_first_temp_read(i) < 0) continue;
3120 if (i != new_index)
3121 rename_temp_register(i, new_index);
3122 new_index++;
3123 }
3124
3125 this->next_temp = new_index;
3126 }
3127
3128 /* ------------------------- TGSI conversion stuff -------------------------- */
3129 struct label {
3130 unsigned branch_target;
3131 unsigned token;
3132 };
3133
3134 /**
3135 * Intermediate state used during shader translation.
3136 */
3137 struct st_translate {
3138 struct ureg_program *ureg;
3139
3140 struct ureg_dst temps[MAX_PROGRAM_TEMPS];
3141 struct ureg_src *constants;
3142 struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
3143 struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
3144 struct ureg_dst address[1];
3145 struct ureg_src samplers[PIPE_MAX_SAMPLERS];
3146 struct ureg_src systemValues[SYSTEM_VALUE_MAX];
3147
3148 /* Extra info for handling point size clamping in vertex shader */
3149 struct ureg_dst pointSizeResult; /**< Actual point size output register */
3150 struct ureg_src pointSizeConst; /**< Point size range constant register */
3151 GLint pointSizeOutIndex; /**< Temp point size output register */
3152 GLboolean prevInstWrotePointSize;
3153
3154 const GLuint *inputMapping;
3155 const GLuint *outputMapping;
3156
3157 /* For every instruction that contains a label (eg CALL), keep
3158 * details so that we can go back afterwards and emit the correct
3159 * tgsi instruction number for each label.
3160 */
3161 struct label *labels;
3162 unsigned labels_size;
3163 unsigned labels_count;
3164
3165 /* Keep a record of the tgsi instruction number that each mesa
3166 * instruction starts at, will be used to fix up labels after
3167 * translation.
3168 */
3169 unsigned *insn;
3170 unsigned insn_size;
3171 unsigned insn_count;
3172
3173 unsigned procType; /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */
3174
3175 boolean error;
3176 };
3177
3178 /** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */
3179 static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
3180 TGSI_SEMANTIC_FACE,
3181 TGSI_SEMANTIC_INSTANCEID
3182 };
3183
3184 /**
3185 * Make note of a branch to a label in the TGSI code.
3186 * After we've emitted all instructions, we'll go over the list
3187 * of labels built here and patch the TGSI code with the actual
3188 * location of each label.
3189 */
3190 static unsigned *get_label( struct st_translate *t,
3191 unsigned branch_target )
3192 {
3193 unsigned i;
3194
3195 if (t->labels_count + 1 >= t->labels_size) {
3196 t->labels_size = 1 << (util_logbase2(t->labels_size) + 1);
3197 t->labels = (struct label *)realloc(t->labels,
3198 t->labels_size * sizeof t->labels[0]);
3199 if (t->labels == NULL) {
3200 static unsigned dummy;
3201 t->error = TRUE;
3202 return &dummy;
3203 }
3204 }
3205
3206 i = t->labels_count++;
3207 t->labels[i].branch_target = branch_target;
3208 return &t->labels[i].token;
3209 }
3210
3211 /**
3212 * Called prior to emitting the TGSI code for each Mesa instruction.
3213 * Allocate additional space for instructions if needed.
3214 * Update the insn[] array so the next Mesa instruction points to
3215 * the next TGSI instruction.
3216 */
3217 static void set_insn_start( struct st_translate *t,
3218 unsigned start )
3219 {
3220 if (t->insn_count + 1 >= t->insn_size) {
3221 t->insn_size = 1 << (util_logbase2(t->insn_size) + 1);
3222 t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof t->insn[0]);
3223 if (t->insn == NULL) {
3224 t->error = TRUE;
3225 return;
3226 }
3227 }
3228
3229 t->insn[t->insn_count++] = start;
3230 }
3231
3232 /**
3233 * Map a Mesa dst register to a TGSI ureg_dst register.
3234 */
3235 static struct ureg_dst
3236 dst_register( struct st_translate *t,
3237 gl_register_file file,
3238 GLuint index )
3239 {
3240 switch( file ) {
3241 case PROGRAM_UNDEFINED:
3242 return ureg_dst_undef();
3243
3244 case PROGRAM_TEMPORARY:
3245 if (ureg_dst_is_undef(t->temps[index]))
3246 t->temps[index] = ureg_DECL_temporary( t->ureg );
3247
3248 return t->temps[index];
3249
3250 case PROGRAM_OUTPUT:
3251 if (t->procType == TGSI_PROCESSOR_VERTEX && index == VERT_RESULT_PSIZ)
3252 t->prevInstWrotePointSize = GL_TRUE;
3253
3254 if (t->procType == TGSI_PROCESSOR_VERTEX)
3255 assert(index < VERT_RESULT_MAX);
3256 else if (t->procType == TGSI_PROCESSOR_FRAGMENT)
3257 assert(index < FRAG_RESULT_MAX);
3258 else
3259 assert(index < GEOM_RESULT_MAX);
3260
3261 assert(t->outputMapping[index] < Elements(t->outputs));
3262
3263 return t->outputs[t->outputMapping[index]];
3264
3265 case PROGRAM_ADDRESS:
3266 return t->address[index];
3267
3268 default:
3269 debug_assert( 0 );
3270 return ureg_dst_undef();
3271 }
3272 }
3273
3274 /**
3275 * Map a Mesa src register to a TGSI ureg_src register.
3276 */
3277 static struct ureg_src
3278 src_register( struct st_translate *t,
3279 gl_register_file file,
3280 GLuint index )
3281 {
3282 switch( file ) {
3283 case PROGRAM_UNDEFINED:
3284 return ureg_src_undef();
3285
3286 case PROGRAM_TEMPORARY:
3287 assert(index >= 0);
3288 assert(index < Elements(t->temps));
3289 if (ureg_dst_is_undef(t->temps[index]))
3290 t->temps[index] = ureg_DECL_temporary( t->ureg );
3291 return ureg_src(t->temps[index]);
3292
3293 case PROGRAM_NAMED_PARAM:
3294 case PROGRAM_ENV_PARAM:
3295 case PROGRAM_LOCAL_PARAM:
3296 case PROGRAM_UNIFORM:
3297 assert(index >= 0);
3298 return t->constants[index];
3299 case PROGRAM_STATE_VAR:
3300 case PROGRAM_CONSTANT: /* ie, immediate */
3301 if (index < 0)
3302 return ureg_DECL_constant( t->ureg, 0 );
3303 else
3304 return t->constants[index];
3305
3306 case PROGRAM_INPUT:
3307 assert(t->inputMapping[index] < Elements(t->inputs));
3308 return t->inputs[t->inputMapping[index]];
3309
3310 case PROGRAM_OUTPUT:
3311 assert(t->outputMapping[index] < Elements(t->outputs));
3312 return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */
3313
3314 case PROGRAM_ADDRESS:
3315 return ureg_src(t->address[index]);
3316
3317 case PROGRAM_SYSTEM_VALUE:
3318 assert(index < Elements(t->systemValues));
3319 return t->systemValues[index];
3320
3321 default:
3322 debug_assert( 0 );
3323 return ureg_src_undef();
3324 }
3325 }
3326
3327 /**
3328 * Create a TGSI ureg_dst register from a Mesa dest register.
3329 */
3330 static struct ureg_dst
3331 translate_dst( struct st_translate *t,
3332 const st_dst_reg *dst_reg, //const struct prog_dst_register *DstReg,
3333 boolean saturate )
3334 {
3335 struct ureg_dst dst = dst_register( t,
3336 dst_reg->file,
3337 dst_reg->index );
3338
3339 dst = ureg_writemask( dst,
3340 dst_reg->writemask );
3341
3342 if (saturate)
3343 dst = ureg_saturate( dst );
3344
3345 if (dst_reg->reladdr != NULL)
3346 dst = ureg_dst_indirect( dst, ureg_src(t->address[0]) );
3347
3348 return dst;
3349 }
3350
3351 /**
3352 * Create a TGSI ureg_src register from a Mesa src register.
3353 */
3354 static struct ureg_src
3355 translate_src( struct st_translate *t,
3356 const st_src_reg *src_reg )
3357 {
3358 struct ureg_src src = src_register( t, src_reg->file, src_reg->index );
3359
3360 src = ureg_swizzle( src,
3361 GET_SWZ( src_reg->swizzle, 0 ) & 0x3,
3362 GET_SWZ( src_reg->swizzle, 1 ) & 0x3,
3363 GET_SWZ( src_reg->swizzle, 2 ) & 0x3,
3364 GET_SWZ( src_reg->swizzle, 3 ) & 0x3);
3365
3366 if ((src_reg->negate & 0xf) == NEGATE_XYZW)
3367 src = ureg_negate(src);
3368
3369 #if 0
3370 // src_reg currently does not have an equivalent to SrcReg->Abs in Mesa IR
3371 if (src_reg->abs)
3372 src = ureg_abs(src);
3373 #endif
3374
3375 if (src_reg->reladdr != NULL) {
3376 /* Normally ureg_src_indirect() would be used here, but a stupid compiler
3377 * bug in g++ makes ureg_src_indirect (an inline C function) erroneously
3378 * set the bit for src.Negate. So we have to do the operation manually
3379 * here to work around the compiler's problems. */
3380 /*src = ureg_src_indirect(src, ureg_src(t->address[0]));*/
3381 struct ureg_src addr = ureg_src(t->address[0]);
3382 src.Indirect = 1;
3383 src.IndirectFile = addr.File;
3384 src.IndirectIndex = addr.Index;
3385 src.IndirectSwizzle = addr.SwizzleX;
3386
3387 if (src_reg->file != PROGRAM_INPUT &&
3388 src_reg->file != PROGRAM_OUTPUT) {
3389 /* If src_reg->index was negative, it was set to zero in
3390 * src_register(). Reassign it now. But don't do this
3391 * for input/output regs since they get remapped while
3392 * const buffers don't.
3393 */
3394 src.Index = src_reg->index;
3395 }
3396 }
3397
3398 return src;
3399 }
3400
3401 static void
3402 compile_tgsi_instruction(struct st_translate *t,
3403 const struct glsl_to_tgsi_instruction *inst)
3404 {
3405 struct ureg_program *ureg = t->ureg;
3406 GLuint i;
3407 struct ureg_dst dst[1];
3408 struct ureg_src src[4];
3409 unsigned num_dst;
3410 unsigned num_src;
3411
3412 num_dst = _mesa_num_inst_dst_regs( inst->op );
3413 num_src = _mesa_num_inst_src_regs( inst->op );
3414
3415 if (num_dst)
3416 dst[0] = translate_dst( t,
3417 &inst->dst,
3418 inst->saturate); // inst->SaturateMode
3419
3420 for (i = 0; i < num_src; i++)
3421 src[i] = translate_src( t, &inst->src[i] );
3422
3423 switch( inst->op ) {
3424 case OPCODE_SWZ:
3425 // TODO: copy emit_swz function from st_mesa_to_tgsi.c
3426 //emit_swz( t, dst[0], &inst->src[0] );
3427 assert(!"OPCODE_SWZ");
3428 return;
3429
3430 case OPCODE_BGNLOOP:
3431 case OPCODE_CAL:
3432 case OPCODE_ELSE:
3433 case OPCODE_ENDLOOP:
3434 case OPCODE_IF:
3435 debug_assert(num_dst == 0);
3436 ureg_label_insn( ureg,
3437 translate_opcode( inst->op ),
3438 src, num_src,
3439 get_label( t,
3440 inst->op == OPCODE_CAL ? inst->function->sig_id : 0 ));
3441 return;
3442
3443 case OPCODE_TEX:
3444 case OPCODE_TXB:
3445 case OPCODE_TXD:
3446 case OPCODE_TXL:
3447 case OPCODE_TXP:
3448 src[num_src++] = t->samplers[inst->sampler];
3449 ureg_tex_insn( ureg,
3450 translate_opcode( inst->op ),
3451 dst, num_dst,
3452 translate_texture_target( inst->tex_target,
3453 inst->tex_shadow ),
3454 src, num_src );
3455 return;
3456
3457 case OPCODE_SCS:
3458 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY );
3459 ureg_insn( ureg,
3460 translate_opcode( inst->op ),
3461 dst, num_dst,
3462 src, num_src );
3463 break;
3464
3465 case OPCODE_XPD:
3466 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ );
3467 ureg_insn( ureg,
3468 translate_opcode( inst->op ),
3469 dst, num_dst,
3470 src, num_src );
3471 break;
3472
3473 case OPCODE_NOISE1:
3474 case OPCODE_NOISE2:
3475 case OPCODE_NOISE3:
3476 case OPCODE_NOISE4:
3477 /* At some point, a motivated person could add a better
3478 * implementation of noise. Currently not even the nvidia
3479 * binary drivers do anything more than this. In any case, the
3480 * place to do this is in the GL state tracker, not the poor
3481 * driver.
3482 */
3483 ureg_MOV( ureg, dst[0], ureg_imm1f(ureg, 0.5) );
3484 break;
3485
3486 case OPCODE_DDY:
3487 // TODO: copy emit_ddy() function from st_mesa_to_tgsi.c
3488 assert(!"OPCODE_DDY");
3489 //emit_ddy( t, dst[0], &inst->src[0] );
3490 break;
3491
3492 default:
3493 ureg_insn( ureg,
3494 translate_opcode( inst->op ),
3495 dst, num_dst,
3496 src, num_src );
3497 break;
3498 }
3499 }
3500
3501 /**
3502 * Emit the TGSI instructions to adjust the WPOS pixel center convention
3503 * Basically, add (adjX, adjY) to the fragment position.
3504 */
3505 static void
3506 emit_adjusted_wpos( struct st_translate *t,
3507 const struct gl_program *program,
3508 GLfloat adjX, GLfloat adjY)
3509 {
3510 struct ureg_program *ureg = t->ureg;
3511 struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg);
3512 struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
3513
3514 /* Note that we bias X and Y and pass Z and W through unchanged.
3515 * The shader might also use gl_FragCoord.w and .z.
3516 */
3517 ureg_ADD(ureg, wpos_temp, wpos_input,
3518 ureg_imm4f(ureg, adjX, adjY, 0.0f, 0.0f));
3519
3520 t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
3521 }
3522
3523
3524 /**
3525 * Emit the TGSI instructions for inverting the WPOS y coordinate.
3526 * This code is unavoidable because it also depends on whether
3527 * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM).
3528 */
3529 static void
3530 emit_wpos_inversion( struct st_translate *t,
3531 const struct gl_program *program,
3532 boolean invert)
3533 {
3534 struct ureg_program *ureg = t->ureg;
3535
3536 /* Fragment program uses fragment position input.
3537 * Need to replace instances of INPUT[WPOS] with temp T
3538 * where T = INPUT[WPOS] by y is inverted.
3539 */
3540 static const gl_state_index wposTransformState[STATE_LENGTH]
3541 = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM,
3542 (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 };
3543
3544 /* XXX: note we are modifying the incoming shader here! Need to
3545 * do this before emitting the constant decls below, or this
3546 * will be missed:
3547 */
3548 unsigned wposTransConst = _mesa_add_state_reference(program->Parameters,
3549 wposTransformState);
3550
3551 struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst );
3552 struct ureg_dst wpos_temp;
3553 struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
3554
3555 /* MOV wpos_temp, input[wpos]
3556 */
3557 if (wpos_input.File == TGSI_FILE_TEMPORARY)
3558 wpos_temp = ureg_dst(wpos_input);
3559 else {
3560 wpos_temp = ureg_DECL_temporary( ureg );
3561 ureg_MOV( ureg, wpos_temp, wpos_input );
3562 }
3563
3564 if (invert) {
3565 /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
3566 */
3567 ureg_MAD( ureg,
3568 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
3569 wpos_input,
3570 ureg_scalar(wpostrans, 0),
3571 ureg_scalar(wpostrans, 1));
3572 } else {
3573 /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
3574 */
3575 ureg_MAD( ureg,
3576 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
3577 wpos_input,
3578 ureg_scalar(wpostrans, 2),
3579 ureg_scalar(wpostrans, 3));
3580 }
3581
3582 /* Use wpos_temp as position input from here on:
3583 */
3584 t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
3585 }
3586
3587
3588 /**
3589 * Emit fragment position/ooordinate code.
3590 */
3591 static void
3592 emit_wpos(struct st_context *st,
3593 struct st_translate *t,
3594 const struct gl_program *program,
3595 struct ureg_program *ureg)
3596 {
3597 const struct gl_fragment_program *fp =
3598 (const struct gl_fragment_program *) program;
3599 struct pipe_screen *pscreen = st->pipe->screen;
3600 boolean invert = FALSE;
3601
3602 if (fp->OriginUpperLeft) {
3603 /* Fragment shader wants origin in upper-left */
3604 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
3605 /* the driver supports upper-left origin */
3606 }
3607 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) {
3608 /* the driver supports lower-left origin, need to invert Y */
3609 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
3610 invert = TRUE;
3611 }
3612 else
3613 assert(0);
3614 }
3615 else {
3616 /* Fragment shader wants origin in lower-left */
3617 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT))
3618 /* the driver supports lower-left origin */
3619 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
3620 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT))
3621 /* the driver supports upper-left origin, need to invert Y */
3622 invert = TRUE;
3623 else
3624 assert(0);
3625 }
3626
3627 if (fp->PixelCenterInteger) {
3628 /* Fragment shader wants pixel center integer */
3629 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER))
3630 /* the driver supports pixel center integer */
3631 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3632 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER))
3633 /* the driver supports pixel center half integer, need to bias X,Y */
3634 emit_adjusted_wpos(t, program, 0.5f, invert ? 0.5f : -0.5f);
3635 else
3636 assert(0);
3637 }
3638 else {
3639 /* Fragment shader wants pixel center half integer */
3640 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
3641 /* the driver supports pixel center half integer */
3642 }
3643 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
3644 /* the driver supports pixel center integer, need to bias X,Y */
3645 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
3646 emit_adjusted_wpos(t, program, 0.5f, invert ? -0.5f : 0.5f);
3647 }
3648 else
3649 assert(0);
3650 }
3651
3652 /* we invert after adjustment so that we avoid the MOV to temporary,
3653 * and reuse the adjustment ADD instead */
3654 emit_wpos_inversion(t, program, invert);
3655 }
3656
3657 /**
3658 * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format.
3659 * \param program the program to translate
3660 * \param numInputs number of input registers used
3661 * \param inputMapping maps Mesa fragment program inputs to TGSI generic
3662 * input indexes
3663 * \param inputSemanticName the TGSI_SEMANTIC flag for each input
3664 * \param inputSemanticIndex the semantic index (ex: which texcoord) for
3665 * each input
3666 * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
3667 * \param numOutputs number of output registers used
3668 * \param outputMapping maps Mesa fragment program outputs to TGSI
3669 * generic outputs
3670 * \param outputSemanticName the TGSI_SEMANTIC flag for each output
3671 * \param outputSemanticIndex the semantic index (ex: which texcoord) for
3672 * each output
3673 *
3674 * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY
3675 */
3676 extern "C" enum pipe_error
3677 st_translate_program(
3678 struct gl_context *ctx,
3679 uint procType,
3680 struct ureg_program *ureg,
3681 glsl_to_tgsi_visitor *program,
3682 const struct gl_program *proginfo,
3683 GLuint numInputs,
3684 const GLuint inputMapping[],
3685 const ubyte inputSemanticName[],
3686 const ubyte inputSemanticIndex[],
3687 const GLuint interpMode[],
3688 GLuint numOutputs,
3689 const GLuint outputMapping[],
3690 const ubyte outputSemanticName[],
3691 const ubyte outputSemanticIndex[],
3692 boolean passthrough_edgeflags )
3693 {
3694 struct st_translate translate, *t;
3695 unsigned i;
3696 enum pipe_error ret = PIPE_OK;
3697
3698 assert(numInputs <= Elements(t->inputs));
3699 assert(numOutputs <= Elements(t->outputs));
3700
3701 t = &translate;
3702 memset(t, 0, sizeof *t);
3703
3704 t->procType = procType;
3705 t->inputMapping = inputMapping;
3706 t->outputMapping = outputMapping;
3707 t->ureg = ureg;
3708 t->pointSizeOutIndex = -1;
3709 t->prevInstWrotePointSize = GL_FALSE;
3710
3711 /*
3712 * Declare input attributes.
3713 */
3714 if (procType == TGSI_PROCESSOR_FRAGMENT) {
3715 for (i = 0; i < numInputs; i++) {
3716 t->inputs[i] = ureg_DECL_fs_input(ureg,
3717 inputSemanticName[i],
3718 inputSemanticIndex[i],
3719 interpMode[i]);
3720 }
3721
3722 if (proginfo->InputsRead & FRAG_BIT_WPOS) {
3723 /* Must do this after setting up t->inputs, and before
3724 * emitting constant references, below:
3725 */
3726 printf("FRAG_BIT_WPOS\n");
3727 emit_wpos(st_context(ctx), t, proginfo, ureg);
3728 }
3729
3730 if (proginfo->InputsRead & FRAG_BIT_FACE) {
3731 // TODO: uncomment
3732 printf("FRAG_BIT_FACE\n");
3733 //emit_face_var( t, program );
3734 }
3735
3736 /*
3737 * Declare output attributes.
3738 */
3739 for (i = 0; i < numOutputs; i++) {
3740 switch (outputSemanticName[i]) {
3741 case TGSI_SEMANTIC_POSITION:
3742 t->outputs[i] = ureg_DECL_output( ureg,
3743 TGSI_SEMANTIC_POSITION, /* Z / Depth */
3744 outputSemanticIndex[i] );
3745
3746 t->outputs[i] = ureg_writemask( t->outputs[i],
3747 TGSI_WRITEMASK_Z );
3748 break;
3749 case TGSI_SEMANTIC_STENCIL:
3750 t->outputs[i] = ureg_DECL_output( ureg,
3751 TGSI_SEMANTIC_STENCIL, /* Stencil */
3752 outputSemanticIndex[i] );
3753 t->outputs[i] = ureg_writemask( t->outputs[i],
3754 TGSI_WRITEMASK_Y );
3755 break;
3756 case TGSI_SEMANTIC_COLOR:
3757 t->outputs[i] = ureg_DECL_output( ureg,
3758 TGSI_SEMANTIC_COLOR,
3759 outputSemanticIndex[i] );
3760 break;
3761 default:
3762 debug_assert(0);
3763 return PIPE_ERROR_BAD_INPUT;
3764 }
3765 }
3766 }
3767 else if (procType == TGSI_PROCESSOR_GEOMETRY) {
3768 for (i = 0; i < numInputs; i++) {
3769 t->inputs[i] = ureg_DECL_gs_input(ureg,
3770 i,
3771 inputSemanticName[i],
3772 inputSemanticIndex[i]);
3773 }
3774
3775 for (i = 0; i < numOutputs; i++) {
3776 t->outputs[i] = ureg_DECL_output( ureg,
3777 outputSemanticName[i],
3778 outputSemanticIndex[i] );
3779 }
3780 }
3781 else {
3782 assert(procType == TGSI_PROCESSOR_VERTEX);
3783
3784 for (i = 0; i < numInputs; i++) {
3785 t->inputs[i] = ureg_DECL_vs_input(ureg, i);
3786 }
3787
3788 for (i = 0; i < numOutputs; i++) {
3789 t->outputs[i] = ureg_DECL_output( ureg,
3790 outputSemanticName[i],
3791 outputSemanticIndex[i] );
3792 if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && proginfo->Id) {
3793 /* Writing to the point size result register requires special
3794 * handling to implement clamping.
3795 */
3796 static const gl_state_index pointSizeClampState[STATE_LENGTH]
3797 = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 };
3798 /* XXX: note we are modifying the incoming shader here! Need to
3799 * do this before emitting the constant decls below, or this
3800 * will be missed.
3801 * XXX: depends on "Parameters" field specific to Mesa IR
3802 */
3803 unsigned pointSizeClampConst =
3804 _mesa_add_state_reference(proginfo->Parameters,
3805 pointSizeClampState);
3806 struct ureg_dst psizregtemp = ureg_DECL_temporary( ureg );
3807 t->pointSizeConst = ureg_DECL_constant( ureg, pointSizeClampConst );
3808 t->pointSizeResult = t->outputs[i];
3809 t->pointSizeOutIndex = i;
3810 t->outputs[i] = psizregtemp;
3811 }
3812 }
3813 /*if (passthrough_edgeflags)
3814 emit_edgeflags( t, program ); */ // TODO: uncomment
3815 }
3816
3817 /* Declare address register.
3818 */
3819 if (program->num_address_regs > 0) {
3820 debug_assert( program->num_address_regs == 1 );
3821 t->address[0] = ureg_DECL_address( ureg );
3822 }
3823
3824 /* Declare misc input registers
3825 */
3826 {
3827 GLbitfield sysInputs = proginfo->SystemValuesRead;
3828 unsigned numSys = 0;
3829 for (i = 0; sysInputs; i++) {
3830 if (sysInputs & (1 << i)) {
3831 unsigned semName = mesa_sysval_to_semantic[i];
3832 t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0);
3833 numSys++;
3834 sysInputs &= ~(1 << i);
3835 }
3836 }
3837 }
3838
3839 if (program->indirect_addr_temps) {
3840 /* If temps are accessed with indirect addressing, declare temporaries
3841 * in sequential order. Else, we declare them on demand elsewhere.
3842 * (Note: the number of temporaries is equal to program->next_temp)
3843 */
3844 for (i = 0; i < (unsigned)program->next_temp; i++) {
3845 /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */
3846 t->temps[i] = ureg_DECL_temporary( t->ureg );
3847 }
3848 }
3849
3850 /* Emit constants and immediates. Mesa uses a single index space
3851 * for these, so we put all the translated regs in t->constants.
3852 * XXX: this entire if block depends on proginfo->Parameters from Mesa IR
3853 */
3854 if (proginfo->Parameters) {
3855 t->constants = (struct ureg_src *)CALLOC( proginfo->Parameters->NumParameters * sizeof t->constants[0] );
3856 if (t->constants == NULL) {
3857 ret = PIPE_ERROR_OUT_OF_MEMORY;
3858 goto out;
3859 }
3860
3861 for (i = 0; i < proginfo->Parameters->NumParameters; i++) {
3862 switch (proginfo->Parameters->Parameters[i].Type) {
3863 case PROGRAM_ENV_PARAM:
3864 case PROGRAM_LOCAL_PARAM:
3865 case PROGRAM_STATE_VAR:
3866 case PROGRAM_NAMED_PARAM:
3867 case PROGRAM_UNIFORM:
3868 t->constants[i] = ureg_DECL_constant( ureg, i );
3869 break;
3870
3871 /* Emit immediates only when there's no indirect addressing of
3872 * the const buffer.
3873 * FIXME: Be smarter and recognize param arrays:
3874 * indirect addressing is only valid within the referenced
3875 * array.
3876 */
3877 case PROGRAM_CONSTANT:
3878 if (program->indirect_addr_consts)
3879 t->constants[i] = ureg_DECL_constant( ureg, i );
3880 else
3881 t->constants[i] =
3882 ureg_DECL_immediate( ureg,
3883 proginfo->Parameters->ParameterValues[i],
3884 4 );
3885 break;
3886 default:
3887 break;
3888 }
3889 }
3890 }
3891
3892 /* texture samplers */
3893 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
3894 if (program->samplers_used & (1 << i)) {
3895 t->samplers[i] = ureg_DECL_sampler( ureg, i );
3896 }
3897 }
3898
3899 /* Emit each instruction in turn:
3900 */
3901 foreach_iter(exec_list_iterator, iter, program->instructions) {
3902 set_insn_start( t, ureg_get_instruction_number( ureg ));
3903 compile_tgsi_instruction( t, (glsl_to_tgsi_instruction *)iter.get() );
3904
3905 if (t->prevInstWrotePointSize && proginfo->Id) {
3906 /* The previous instruction wrote to the (fake) vertex point size
3907 * result register. Now we need to clamp that value to the min/max
3908 * point size range, putting the result into the real point size
3909 * register.
3910 * Note that we can't do this easily at the end of program due to
3911 * possible early return.
3912 */
3913 set_insn_start( t, ureg_get_instruction_number( ureg ));
3914 ureg_MAX( t->ureg,
3915 ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X),
3916 ureg_src(t->outputs[t->pointSizeOutIndex]),
3917 ureg_swizzle(t->pointSizeConst, 1,1,1,1));
3918 ureg_MIN( t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X),
3919 ureg_src(t->outputs[t->pointSizeOutIndex]),
3920 ureg_swizzle(t->pointSizeConst, 2,2,2,2));
3921 }
3922 t->prevInstWrotePointSize = GL_FALSE;
3923 }
3924
3925 /* Fix up all emitted labels:
3926 */
3927 for (i = 0; i < t->labels_count; i++) {
3928 ureg_fixup_label( ureg,
3929 t->labels[i].token,
3930 t->insn[t->labels[i].branch_target] );
3931 }
3932
3933 out:
3934 FREE(t->insn);
3935 FREE(t->labels);
3936 FREE(t->constants);
3937
3938 if (t->error) {
3939 debug_printf("%s: translate error flag set\n", __FUNCTION__);
3940 }
3941
3942 return ret;
3943 }
3944 /* ----------------------------- End TGSI code ------------------------------ */
3945
3946 /**
3947 * Convert a shader's GLSL IR into a Mesa gl_program, although without
3948 * generating Mesa IR.
3949 */
3950 static struct gl_program *
3951 get_mesa_program(struct gl_context *ctx,
3952 struct gl_shader_program *shader_program,
3953 struct gl_shader *shader)
3954 {
3955 glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor();
3956 struct gl_program *prog;
3957 GLenum target;
3958 const char *target_string;
3959 GLboolean progress;
3960 struct gl_shader_compiler_options *options =
3961 &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)];
3962
3963 switch (shader->Type) {
3964 case GL_VERTEX_SHADER:
3965 target = GL_VERTEX_PROGRAM_ARB;
3966 target_string = "vertex";
3967 break;
3968 case GL_FRAGMENT_SHADER:
3969 target = GL_FRAGMENT_PROGRAM_ARB;
3970 target_string = "fragment";
3971 break;
3972 case GL_GEOMETRY_SHADER:
3973 target = GL_GEOMETRY_PROGRAM_NV;
3974 target_string = "geometry";
3975 break;
3976 default:
3977 assert(!"should not be reached");
3978 return NULL;
3979 }
3980
3981 validate_ir_tree(shader->ir);
3982
3983 prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
3984 if (!prog)
3985 return NULL;
3986 prog->Parameters = _mesa_new_parameter_list();
3987 prog->Varying = _mesa_new_parameter_list();
3988 prog->Attributes = _mesa_new_parameter_list();
3989 v->ctx = ctx;
3990 v->prog = prog;
3991 v->shader_program = shader_program;
3992 v->options = options;
3993
3994 add_uniforms_to_parameters_list(shader_program, shader, prog);
3995
3996 /* Emit Mesa IR for main(). */
3997 visit_exec_list(shader->ir, v);
3998 v->emit(NULL, OPCODE_END);
3999
4000 /* Now emit bodies for any functions that were used. */
4001 do {
4002 progress = GL_FALSE;
4003
4004 foreach_iter(exec_list_iterator, iter, v->function_signatures) {
4005 function_entry *entry = (function_entry *)iter.get();
4006
4007 if (!entry->bgn_inst) {
4008 v->current_function = entry;
4009
4010 entry->bgn_inst = v->emit(NULL, OPCODE_BGNSUB);
4011 entry->bgn_inst->function = entry;
4012
4013 visit_exec_list(&entry->sig->body, v);
4014
4015 glsl_to_tgsi_instruction *last;
4016 last = (glsl_to_tgsi_instruction *)v->instructions.get_tail();
4017 if (last->op != OPCODE_RET)
4018 v->emit(NULL, OPCODE_RET);
4019
4020 glsl_to_tgsi_instruction *end;
4021 end = v->emit(NULL, OPCODE_ENDSUB);
4022 end->function = entry;
4023
4024 progress = GL_TRUE;
4025 }
4026 }
4027 } while (progress);
4028
4029 #if 0
4030 /* Print out some information (for debugging purposes) used by the
4031 * optimization passes. */
4032 for (i=0; i < v->next_temp; i++) {
4033 int fr = v->get_first_temp_read(i);
4034 int fw = v->get_first_temp_write(i);
4035 int lr = v->get_last_temp_read(i);
4036 int lw = v->get_last_temp_write(i);
4037
4038 printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw);
4039 assert(fw <= fr);
4040 }
4041 #endif
4042
4043 /* Remove reads to output registers, and to varyings in vertex shaders. */
4044 v->remove_output_reads(PROGRAM_OUTPUT);
4045 if (target == GL_VERTEX_PROGRAM_ARB)
4046 v->remove_output_reads(PROGRAM_VARYING);
4047
4048 /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
4049 v->copy_propagate();
4050 v->eliminate_dead_code();
4051 v->merge_registers();
4052 v->renumber_registers();
4053
4054 if (ctx->Shader.Flags & GLSL_DUMP) {
4055 printf("\n");
4056 printf("GLSL IR for linked %s program %d:\n", target_string,
4057 shader_program->Name);
4058 _mesa_print_ir(shader->ir, NULL);
4059 printf("\n");
4060 printf("\n");
4061 }
4062
4063 prog->Instructions = NULL;
4064 prog->NumInstructions = 0;
4065
4066 do_set_program_inouts(shader->ir, prog);
4067 count_resources(v);
4068
4069 check_resources(ctx, shader_program, v, prog);
4070
4071 _mesa_reference_program(ctx, &shader->Program, prog);
4072
4073 struct st_vertex_program *stvp;
4074 struct st_fragment_program *stfp;
4075 struct st_geometry_program *stgp;
4076
4077 switch (shader->Type) {
4078 case GL_VERTEX_SHADER:
4079 stvp = (struct st_vertex_program *)prog;
4080 stvp->glsl_to_tgsi = v;
4081 break;
4082 case GL_FRAGMENT_SHADER:
4083 stfp = (struct st_fragment_program *)prog;
4084 stfp->glsl_to_tgsi = v;
4085 break;
4086 case GL_GEOMETRY_SHADER:
4087 stgp = (struct st_geometry_program *)prog;
4088 stgp->glsl_to_tgsi = v;
4089 break;
4090 default:
4091 assert(!"should not be reached");
4092 return NULL;
4093 }
4094
4095 return prog;
4096 }
4097
4098 extern "C" {
4099
4100 struct gl_shader *
4101 st_new_shader(struct gl_context *ctx, GLuint name, GLuint type)
4102 {
4103 struct gl_shader *shader;
4104 assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER ||
4105 type == GL_GEOMETRY_SHADER_ARB);
4106 shader = rzalloc(NULL, struct gl_shader);
4107 if (shader) {
4108 shader->Type = type;
4109 shader->Name = name;
4110 _mesa_init_shader(ctx, shader);
4111 }
4112 return shader;
4113 }
4114
4115 struct gl_shader_program *
4116 st_new_shader_program(struct gl_context *ctx, GLuint name)
4117 {
4118 struct gl_shader_program *shProg;
4119 shProg = rzalloc(NULL, struct gl_shader_program);
4120 if (shProg) {
4121 shProg->Name = name;
4122 _mesa_init_shader_program(ctx, shProg);
4123 }
4124 return shProg;
4125 }
4126
4127 /**
4128 * Link a shader.
4129 * Called via ctx->Driver.LinkShader()
4130 * This actually involves converting GLSL IR into Mesa gl_programs with
4131 * code lowering and other optimizations.
4132 */
4133 GLboolean
4134 st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
4135 {
4136 assert(prog->LinkStatus);
4137
4138 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
4139 if (prog->_LinkedShaders[i] == NULL)
4140 continue;
4141
4142 bool progress;
4143 exec_list *ir = prog->_LinkedShaders[i]->ir;
4144 const struct gl_shader_compiler_options *options =
4145 &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)];
4146
4147 do {
4148 progress = false;
4149
4150 /* Lowering */
4151 do_mat_op_to_vec(ir);
4152 lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
4153 | LOG_TO_LOG2
4154 | ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
4155
4156 progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
4157
4158 progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress;
4159
4160 progress = lower_quadop_vector(ir, true) || progress;
4161
4162 if (options->EmitNoIfs) {
4163 progress = lower_discard(ir) || progress;
4164 progress = lower_if_to_cond_assign(ir) || progress;
4165 }
4166
4167 if (options->EmitNoNoise)
4168 progress = lower_noise(ir) || progress;
4169
4170 /* If there are forms of indirect addressing that the driver
4171 * cannot handle, perform the lowering pass.
4172 */
4173 if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput
4174 || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform)
4175 progress =
4176 lower_variable_index_to_cond_assign(ir,
4177 options->EmitNoIndirectInput,
4178 options->EmitNoIndirectOutput,
4179 options->EmitNoIndirectTemp,
4180 options->EmitNoIndirectUniform)
4181 || progress;
4182
4183 progress = do_vec_index_to_cond_assign(ir) || progress;
4184 } while (progress);
4185
4186 validate_ir_tree(ir);
4187 }
4188
4189 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
4190 struct gl_program *linked_prog;
4191
4192 if (prog->_LinkedShaders[i] == NULL)
4193 continue;
4194
4195 linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
4196
4197 if (linked_prog) {
4198 bool ok = true;
4199
4200 switch (prog->_LinkedShaders[i]->Type) {
4201 case GL_VERTEX_SHADER:
4202 _mesa_reference_vertprog(ctx, &prog->VertexProgram,
4203 (struct gl_vertex_program *)linked_prog);
4204 ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB,
4205 linked_prog);
4206 break;
4207 case GL_FRAGMENT_SHADER:
4208 _mesa_reference_fragprog(ctx, &prog->FragmentProgram,
4209 (struct gl_fragment_program *)linked_prog);
4210 ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB,
4211 linked_prog);
4212 break;
4213 case GL_GEOMETRY_SHADER:
4214 _mesa_reference_geomprog(ctx, &prog->GeometryProgram,
4215 (struct gl_geometry_program *)linked_prog);
4216 ok = ctx->Driver.ProgramStringNotify(ctx, GL_GEOMETRY_PROGRAM_NV,
4217 linked_prog);
4218 break;
4219 }
4220 if (!ok) {
4221 return GL_FALSE;
4222 }
4223 }
4224
4225 _mesa_reference_program(ctx, &linked_prog, NULL);
4226 }
4227
4228 return GL_TRUE;
4229 }
4230
4231
4232 /**
4233 * Link a GLSL shader program. Called via glLinkProgram().
4234 */
4235 void
4236 st_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
4237 {
4238 unsigned int i;
4239
4240 _mesa_clear_shader_program_data(ctx, prog);
4241
4242 prog->LinkStatus = GL_TRUE;
4243
4244 for (i = 0; i < prog->NumShaders; i++) {
4245 if (!prog->Shaders[i]->CompileStatus) {
4246 fail_link(prog, "linking with uncompiled shader");
4247 prog->LinkStatus = GL_FALSE;
4248 }
4249 }
4250
4251 prog->Varying = _mesa_new_parameter_list();
4252 _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL);
4253 _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL);
4254 _mesa_reference_geomprog(ctx, &prog->GeometryProgram, NULL);
4255
4256 if (prog->LinkStatus) {
4257 link_shaders(ctx, prog);
4258 }
4259
4260 if (prog->LinkStatus) {
4261 if (!ctx->Driver.LinkShader(ctx, prog)) {
4262 prog->LinkStatus = GL_FALSE;
4263 }
4264 }
4265
4266 set_uniform_initializers(ctx, prog);
4267
4268 if (ctx->Shader.Flags & GLSL_DUMP) {
4269 if (!prog->LinkStatus) {
4270 printf("GLSL shader program %d failed to link\n", prog->Name);
4271 }
4272
4273 if (prog->InfoLog && prog->InfoLog[0] != 0) {
4274 printf("GLSL shader program %d info log:\n", prog->Name);
4275 printf("%s\n", prog->InfoLog);
4276 }
4277 }
4278 }
4279
4280 } /* extern "C" */