glsl_to_tgsi: make coding style more consistent
[mesa.git] / src / mesa / state_tracker / st_glsl_to_tgsi.cpp
1 /*
2 * Copyright (C) 2005-2007 Brian Paul All Rights Reserved.
3 * Copyright (C) 2008 VMware, Inc. All Rights Reserved.
4 * Copyright © 2010 Intel Corporation
5 * Copyright © 2011 Bryan Cain
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 * DEALINGS IN THE SOFTWARE.
25 */
26
27 /**
28 * \file glsl_to_tgsi.cpp
29 *
30 * Translate GLSL IR to TGSI.
31 */
32
33 #include <stdio.h>
34 #include "main/compiler.h"
35 #include "ir.h"
36 #include "ir_visitor.h"
37 #include "ir_print_visitor.h"
38 #include "ir_expression_flattening.h"
39 #include "glsl_types.h"
40 #include "glsl_parser_extras.h"
41 #include "../glsl/program.h"
42 #include "ir_optimization.h"
43 #include "ast.h"
44
45 extern "C" {
46 #include "main/mtypes.h"
47 #include "main/shaderapi.h"
48 #include "main/shaderobj.h"
49 #include "main/uniforms.h"
50 #include "program/hash_table.h"
51 #include "program/prog_instruction.h"
52 #include "program/prog_optimize.h"
53 #include "program/prog_print.h"
54 #include "program/program.h"
55 #include "program/prog_uniform.h"
56 #include "program/prog_parameter.h"
57 #include "program/sampler.h"
58
59 #include "pipe/p_compiler.h"
60 #include "pipe/p_context.h"
61 #include "pipe/p_screen.h"
62 #include "pipe/p_shader_tokens.h"
63 #include "pipe/p_state.h"
64 #include "util/u_math.h"
65 #include "tgsi/tgsi_ureg.h"
66 #include "tgsi/tgsi_info.h"
67 #include "st_context.h"
68 #include "st_program.h"
69 #include "st_glsl_to_tgsi.h"
70 #include "st_mesa_to_tgsi.h"
71 }
72
73 #define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX
74 #define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) | \
75 (1 << PROGRAM_ENV_PARAM) | \
76 (1 << PROGRAM_STATE_VAR) | \
77 (1 << PROGRAM_NAMED_PARAM) | \
78 (1 << PROGRAM_CONSTANT) | \
79 (1 << PROGRAM_UNIFORM))
80
81 #define MAX_TEMPS 4096
82
83 class st_src_reg;
84 class st_dst_reg;
85
86 static int swizzle_for_size(int size);
87
88 /**
89 * This struct is a corresponding struct to TGSI ureg_src.
90 */
91 class st_src_reg {
92 public:
93 st_src_reg(gl_register_file file, int index, const glsl_type *type)
94 {
95 this->file = file;
96 this->index = index;
97 if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
98 this->swizzle = swizzle_for_size(type->vector_elements);
99 else
100 this->swizzle = SWIZZLE_XYZW;
101 this->negate = 0;
102 this->type = type ? type->base_type : GLSL_TYPE_ERROR;
103 this->reladdr = NULL;
104 }
105
106 st_src_reg(gl_register_file file, int index, int type)
107 {
108 this->type = type;
109 this->file = file;
110 this->index = index;
111 this->swizzle = SWIZZLE_XYZW;
112 this->negate = 0;
113 this->reladdr = NULL;
114 }
115
116 st_src_reg()
117 {
118 this->type = GLSL_TYPE_ERROR;
119 this->file = PROGRAM_UNDEFINED;
120 this->index = 0;
121 this->swizzle = 0;
122 this->negate = 0;
123 this->reladdr = NULL;
124 }
125
126 explicit st_src_reg(st_dst_reg reg);
127
128 gl_register_file file; /**< PROGRAM_* from Mesa */
129 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
130 GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
131 int negate; /**< NEGATE_XYZW mask from mesa */
132 int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
133 /** Register index should be offset by the integer in this reg. */
134 st_src_reg *reladdr;
135 };
136
137 class st_dst_reg {
138 public:
139 st_dst_reg(gl_register_file file, int writemask, int type)
140 {
141 this->file = file;
142 this->index = 0;
143 this->writemask = writemask;
144 this->cond_mask = COND_TR;
145 this->reladdr = NULL;
146 this->type = type;
147 }
148
149 st_dst_reg()
150 {
151 this->type = GLSL_TYPE_ERROR;
152 this->file = PROGRAM_UNDEFINED;
153 this->index = 0;
154 this->writemask = 0;
155 this->cond_mask = COND_TR;
156 this->reladdr = NULL;
157 }
158
159 explicit st_dst_reg(st_src_reg reg);
160
161 gl_register_file file; /**< PROGRAM_* from Mesa */
162 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
163 int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
164 GLuint cond_mask:4;
165 int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
166 /** Register index should be offset by the integer in this reg. */
167 st_src_reg *reladdr;
168 };
169
170 st_src_reg::st_src_reg(st_dst_reg reg)
171 {
172 this->type = reg.type;
173 this->file = reg.file;
174 this->index = reg.index;
175 this->swizzle = SWIZZLE_XYZW;
176 this->negate = 0;
177 this->reladdr = NULL;
178 }
179
180 st_dst_reg::st_dst_reg(st_src_reg reg)
181 {
182 this->type = reg.type;
183 this->file = reg.file;
184 this->index = reg.index;
185 this->writemask = WRITEMASK_XYZW;
186 this->cond_mask = COND_TR;
187 this->reladdr = reg.reladdr;
188 }
189
190 class glsl_to_tgsi_instruction : public exec_node {
191 public:
192 /* Callers of this ralloc-based new need not call delete. It's
193 * easier to just ralloc_free 'ctx' (or any of its ancestors). */
194 static void* operator new(size_t size, void *ctx)
195 {
196 void *node;
197
198 node = rzalloc_size(ctx, size);
199 assert(node != NULL);
200
201 return node;
202 }
203
204 unsigned op;
205 st_dst_reg dst;
206 st_src_reg src[3];
207 /** Pointer to the ir source this tree came from for debugging */
208 ir_instruction *ir;
209 GLboolean cond_update;
210 bool saturate;
211 int sampler; /**< sampler index */
212 int tex_target; /**< One of TEXTURE_*_INDEX */
213 GLboolean tex_shadow;
214 int dead_mask; /**< Used in dead code elimination */
215
216 class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */
217 };
218
219 class variable_storage : public exec_node {
220 public:
221 variable_storage(ir_variable *var, gl_register_file file, int index)
222 : file(file), index(index), var(var)
223 {
224 /* empty */
225 }
226
227 gl_register_file file;
228 int index;
229 ir_variable *var; /* variable that maps to this, if any */
230 };
231
232 class function_entry : public exec_node {
233 public:
234 ir_function_signature *sig;
235
236 /**
237 * identifier of this function signature used by the program.
238 *
239 * At the point that Mesa instructions for function calls are
240 * generated, we don't know the address of the first instruction of
241 * the function body. So we make the BranchTarget that is called a
242 * small integer and rewrite them during set_branchtargets().
243 */
244 int sig_id;
245
246 /**
247 * Pointer to first instruction of the function body.
248 *
249 * Set during function body emits after main() is processed.
250 */
251 glsl_to_tgsi_instruction *bgn_inst;
252
253 /**
254 * Index of the first instruction of the function body in actual
255 * Mesa IR.
256 *
257 * Set after convertion from glsl_to_tgsi_instruction to prog_instruction.
258 */
259 int inst;
260
261 /** Storage for the return value. */
262 st_src_reg return_reg;
263 };
264
265 class glsl_to_tgsi_visitor : public ir_visitor {
266 public:
267 glsl_to_tgsi_visitor();
268 ~glsl_to_tgsi_visitor();
269
270 function_entry *current_function;
271
272 struct gl_context *ctx;
273 struct gl_program *prog;
274 struct gl_shader_program *shader_program;
275 struct gl_shader_compiler_options *options;
276 struct gl_program_parameter_list *immediates;
277
278 int next_temp;
279
280 int num_address_regs;
281 int samplers_used;
282 bool indirect_addr_temps;
283 bool indirect_addr_consts;
284
285 int glsl_version;
286
287 variable_storage *find_variable_storage(ir_variable *var);
288
289 function_entry *get_function_signature(ir_function_signature *sig);
290
291 st_src_reg get_temp(const glsl_type *type);
292 void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr);
293
294 st_src_reg st_src_reg_for_float(float val);
295 st_src_reg st_src_reg_for_int(int val);
296 st_src_reg st_src_reg_for_type(int type, int val);
297
298 /**
299 * \name Visit methods
300 *
301 * As typical for the visitor pattern, there must be one \c visit method for
302 * each concrete subclass of \c ir_instruction. Virtual base classes within
303 * the hierarchy should not have \c visit methods.
304 */
305 /*@{*/
306 virtual void visit(ir_variable *);
307 virtual void visit(ir_loop *);
308 virtual void visit(ir_loop_jump *);
309 virtual void visit(ir_function_signature *);
310 virtual void visit(ir_function *);
311 virtual void visit(ir_expression *);
312 virtual void visit(ir_swizzle *);
313 virtual void visit(ir_dereference_variable *);
314 virtual void visit(ir_dereference_array *);
315 virtual void visit(ir_dereference_record *);
316 virtual void visit(ir_assignment *);
317 virtual void visit(ir_constant *);
318 virtual void visit(ir_call *);
319 virtual void visit(ir_return *);
320 virtual void visit(ir_discard *);
321 virtual void visit(ir_texture *);
322 virtual void visit(ir_if *);
323 /*@}*/
324
325 st_src_reg result;
326
327 /** List of variable_storage */
328 exec_list variables;
329
330 /** List of function_entry */
331 exec_list function_signatures;
332 int next_signature_id;
333
334 /** List of glsl_to_tgsi_instruction */
335 exec_list instructions;
336
337 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op);
338
339 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
340 st_dst_reg dst, st_src_reg src0);
341
342 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
343 st_dst_reg dst, st_src_reg src0, st_src_reg src1);
344
345 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
346 st_dst_reg dst,
347 st_src_reg src0, st_src_reg src1, st_src_reg src2);
348
349 unsigned get_opcode(ir_instruction *ir, unsigned op,
350 st_dst_reg dst,
351 st_src_reg src0, st_src_reg src1);
352
353 /**
354 * Emit the correct dot-product instruction for the type of arguments
355 */
356 void emit_dp(ir_instruction *ir,
357 st_dst_reg dst,
358 st_src_reg src0,
359 st_src_reg src1,
360 unsigned elements);
361
362 void emit_scalar(ir_instruction *ir, unsigned op,
363 st_dst_reg dst, st_src_reg src0);
364
365 void emit_scalar(ir_instruction *ir, unsigned op,
366 st_dst_reg dst, st_src_reg src0, st_src_reg src1);
367
368 void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0);
369
370 void emit_scs(ir_instruction *ir, unsigned op,
371 st_dst_reg dst, const st_src_reg &src);
372
373 GLboolean try_emit_mad(ir_expression *ir,
374 int mul_operand);
375 GLboolean try_emit_sat(ir_expression *ir);
376
377 void emit_swz(ir_expression *ir);
378
379 bool process_move_condition(ir_rvalue *ir);
380
381 void remove_output_reads(gl_register_file type);
382 void simplify_cmp(void);
383
384 void rename_temp_register(int index, int new_index);
385 int get_first_temp_read(int index);
386 int get_first_temp_write(int index);
387 int get_last_temp_read(int index);
388 int get_last_temp_write(int index);
389
390 void copy_propagate(void);
391 void eliminate_dead_code(void);
392 int eliminate_dead_code_advanced(void);
393 void merge_registers(void);
394 void renumber_registers(void);
395
396 void *mem_ctx;
397 };
398
399 static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR);
400
401 static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR);
402
403 static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT);
404
405 static void
406 fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3);
407
408 static void
409 fail_link(struct gl_shader_program *prog, const char *fmt, ...)
410 {
411 va_list args;
412 va_start(args, fmt);
413 ralloc_vasprintf_append(&prog->InfoLog, fmt, args);
414 va_end(args);
415
416 prog->LinkStatus = GL_FALSE;
417 }
418
419 static int
420 swizzle_for_size(int size)
421 {
422 int size_swizzles[4] = {
423 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
424 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
425 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
426 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
427 };
428
429 assert((size >= 1) && (size <= 4));
430 return size_swizzles[size - 1];
431 }
432
433 static bool
434 is_tex_instruction(unsigned opcode)
435 {
436 const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
437 return info->is_tex;
438 }
439
440 static unsigned
441 num_inst_dst_regs(unsigned opcode)
442 {
443 const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
444 return info->num_dst;
445 }
446
447 static unsigned
448 num_inst_src_regs(unsigned opcode)
449 {
450 const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
451 return info->is_tex ? info->num_src - 1 : info->num_src;
452 }
453
454 glsl_to_tgsi_instruction *
455 glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
456 st_dst_reg dst,
457 st_src_reg src0, st_src_reg src1, st_src_reg src2)
458 {
459 glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction();
460 int num_reladdr = 0, i;
461
462 op = get_opcode(ir, op, dst, src0, src1);
463
464 /* If we have to do relative addressing, we want to load the ARL
465 * reg directly for one of the regs, and preload the other reladdr
466 * sources into temps.
467 */
468 num_reladdr += dst.reladdr != NULL;
469 num_reladdr += src0.reladdr != NULL;
470 num_reladdr += src1.reladdr != NULL;
471 num_reladdr += src2.reladdr != NULL;
472
473 reladdr_to_temp(ir, &src2, &num_reladdr);
474 reladdr_to_temp(ir, &src1, &num_reladdr);
475 reladdr_to_temp(ir, &src0, &num_reladdr);
476
477 if (dst.reladdr) {
478 emit_arl(ir, address_reg, *dst.reladdr);
479 num_reladdr--;
480 }
481 assert(num_reladdr == 0);
482
483 inst->op = op;
484 inst->dst = dst;
485 inst->src[0] = src0;
486 inst->src[1] = src1;
487 inst->src[2] = src2;
488 inst->ir = ir;
489 inst->dead_mask = 0;
490
491 inst->function = NULL;
492
493 if (op == TGSI_OPCODE_ARL)
494 this->num_address_regs = 1;
495
496 /* Update indirect addressing status used by TGSI */
497 if (dst.reladdr) {
498 switch(dst.file) {
499 case PROGRAM_TEMPORARY:
500 this->indirect_addr_temps = true;
501 break;
502 case PROGRAM_LOCAL_PARAM:
503 case PROGRAM_ENV_PARAM:
504 case PROGRAM_STATE_VAR:
505 case PROGRAM_NAMED_PARAM:
506 case PROGRAM_CONSTANT:
507 case PROGRAM_UNIFORM:
508 this->indirect_addr_consts = true;
509 break;
510 case PROGRAM_IMMEDIATE:
511 assert(!"immediates should not have indirect addressing");
512 break;
513 default:
514 break;
515 }
516 }
517 else {
518 for (i=0; i<3; i++) {
519 if(inst->src[i].reladdr) {
520 switch(inst->src[i].file) {
521 case PROGRAM_TEMPORARY:
522 this->indirect_addr_temps = true;
523 break;
524 case PROGRAM_LOCAL_PARAM:
525 case PROGRAM_ENV_PARAM:
526 case PROGRAM_STATE_VAR:
527 case PROGRAM_NAMED_PARAM:
528 case PROGRAM_CONSTANT:
529 case PROGRAM_UNIFORM:
530 this->indirect_addr_consts = true;
531 break;
532 case PROGRAM_IMMEDIATE:
533 assert(!"immediates should not have indirect addressing");
534 break;
535 default:
536 break;
537 }
538 }
539 }
540 }
541
542 this->instructions.push_tail(inst);
543
544 return inst;
545 }
546
547
548 glsl_to_tgsi_instruction *
549 glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
550 st_dst_reg dst, st_src_reg src0, st_src_reg src1)
551 {
552 return emit(ir, op, dst, src0, src1, undef_src);
553 }
554
555 glsl_to_tgsi_instruction *
556 glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
557 st_dst_reg dst, st_src_reg src0)
558 {
559 assert(dst.writemask != 0);
560 return emit(ir, op, dst, src0, undef_src, undef_src);
561 }
562
563 glsl_to_tgsi_instruction *
564 glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op)
565 {
566 return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
567 }
568
569 /**
570 * Determines whether to use an integer, unsigned integer, or float opcode
571 * based on the operands and input opcode, then emits the result.
572 *
573 * TODO: type checking for remaining TGSI opcodes
574 */
575 unsigned
576 glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
577 st_dst_reg dst,
578 st_src_reg src0, st_src_reg src1)
579 {
580 int type = GLSL_TYPE_FLOAT;
581
582 if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
583 type = GLSL_TYPE_FLOAT;
584 else if (glsl_version >= 130)
585 type = src0.type;
586
587 #define case4(c, f, i, u) \
588 case TGSI_OPCODE_##c: \
589 if (type == GLSL_TYPE_INT) op = TGSI_OPCODE_##i; \
590 else if (type == GLSL_TYPE_UINT) op = TGSI_OPCODE_##u; \
591 else op = TGSI_OPCODE_##f; \
592 break;
593 #define case3(f, i, u) case4(f, f, i, u)
594 #define case2fi(f, i) case4(f, f, i, i)
595 #define case2iu(i, u) case4(i, LAST, i, u)
596
597 switch(op) {
598 case2fi(ADD, UADD);
599 case2fi(MUL, UMUL);
600 case2fi(MAD, UMAD);
601 case3(DIV, IDIV, UDIV);
602 case3(MAX, IMAX, UMAX);
603 case3(MIN, IMIN, UMIN);
604 case2iu(MOD, UMOD);
605
606 case2fi(SEQ, USEQ);
607 case2fi(SNE, USNE);
608 case3(SGE, ISGE, USGE);
609 case3(SLT, ISLT, USLT);
610
611 case2iu(SHL, SHL);
612 case2iu(ISHR, USHR);
613 case2iu(NOT, NOT);
614 case2iu(AND, AND);
615 case2iu(OR, OR);
616 case2iu(XOR, XOR);
617
618 default: break;
619 }
620
621 assert(op != TGSI_OPCODE_LAST);
622 return op;
623 }
624
625 void
626 glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
627 st_dst_reg dst, st_src_reg src0, st_src_reg src1,
628 unsigned elements)
629 {
630 static const unsigned dot_opcodes[] = {
631 TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4
632 };
633
634 emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
635 }
636
637 /**
638 * Emits TGSI scalar opcodes to produce unique answers across channels.
639 *
640 * Some TGSI opcodes are scalar-only, like ARB_fp/vp. The src X
641 * channel determines the result across all channels. So to do a vec4
642 * of this operation, we want to emit a scalar per source channel used
643 * to produce dest channels.
644 */
645 void
646 glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
647 st_dst_reg dst,
648 st_src_reg orig_src0, st_src_reg orig_src1)
649 {
650 int i, j;
651 int done_mask = ~dst.writemask;
652
653 /* TGSI RCP is a scalar operation splatting results to all channels,
654 * like ARB_fp/vp. So emit as many RCPs as necessary to cover our
655 * dst channels.
656 */
657 for (i = 0; i < 4; i++) {
658 GLuint this_mask = (1 << i);
659 glsl_to_tgsi_instruction *inst;
660 st_src_reg src0 = orig_src0;
661 st_src_reg src1 = orig_src1;
662
663 if (done_mask & this_mask)
664 continue;
665
666 GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
667 GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
668 for (j = i + 1; j < 4; j++) {
669 /* If there is another enabled component in the destination that is
670 * derived from the same inputs, generate its value on this pass as
671 * well.
672 */
673 if (!(done_mask & (1 << j)) &&
674 GET_SWZ(src0.swizzle, j) == src0_swiz &&
675 GET_SWZ(src1.swizzle, j) == src1_swiz) {
676 this_mask |= (1 << j);
677 }
678 }
679 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
680 src0_swiz, src0_swiz);
681 src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
682 src1_swiz, src1_swiz);
683
684 inst = emit(ir, op, dst, src0, src1);
685 inst->dst.writemask = this_mask;
686 done_mask |= this_mask;
687 }
688 }
689
690 void
691 glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
692 st_dst_reg dst, st_src_reg src0)
693 {
694 st_src_reg undef = undef_src;
695
696 undef.swizzle = SWIZZLE_XXXX;
697
698 emit_scalar(ir, op, dst, src0, undef);
699 }
700
701 void
702 glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
703 st_dst_reg dst, st_src_reg src0)
704 {
705 st_src_reg tmp = get_temp(glsl_type::float_type);
706
707 if (src0.type == GLSL_TYPE_INT)
708 emit(NULL, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0);
709 else if (src0.type == GLSL_TYPE_UINT)
710 emit(NULL, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0);
711 else
712 tmp = src0;
713
714 emit(NULL, TGSI_OPCODE_ARL, dst, tmp);
715 }
716
717 /**
718 * Emit an TGSI_OPCODE_SCS instruction
719 *
720 * The \c SCS opcode functions a bit differently than the other TGSI opcodes.
721 * Instead of splatting its result across all four components of the
722 * destination, it writes one value to the \c x component and another value to
723 * the \c y component.
724 *
725 * \param ir IR instruction being processed
726 * \param op Either \c TGSI_OPCODE_SIN or \c TGSI_OPCODE_COS depending
727 * on which value is desired.
728 * \param dst Destination register
729 * \param src Source register
730 */
731 void
732 glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op,
733 st_dst_reg dst,
734 const st_src_reg &src)
735 {
736 /* Vertex programs cannot use the SCS opcode.
737 */
738 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) {
739 emit_scalar(ir, op, dst, src);
740 return;
741 }
742
743 const unsigned component = (op == TGSI_OPCODE_SIN) ? 0 : 1;
744 const unsigned scs_mask = (1U << component);
745 int done_mask = ~dst.writemask;
746 st_src_reg tmp;
747
748 assert(op == TGSI_OPCODE_SIN || op == TGSI_OPCODE_COS);
749
750 /* If there are compnents in the destination that differ from the component
751 * that will be written by the SCS instrution, we'll need a temporary.
752 */
753 if (scs_mask != unsigned(dst.writemask)) {
754 tmp = get_temp(glsl_type::vec4_type);
755 }
756
757 for (unsigned i = 0; i < 4; i++) {
758 unsigned this_mask = (1U << i);
759 st_src_reg src0 = src;
760
761 if ((done_mask & this_mask) != 0)
762 continue;
763
764 /* The source swizzle specified which component of the source generates
765 * sine / cosine for the current component in the destination. The SCS
766 * instruction requires that this value be swizzle to the X component.
767 * Replace the current swizzle with a swizzle that puts the source in
768 * the X component.
769 */
770 unsigned src0_swiz = GET_SWZ(src.swizzle, i);
771
772 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
773 src0_swiz, src0_swiz);
774 for (unsigned j = i + 1; j < 4; j++) {
775 /* If there is another enabled component in the destination that is
776 * derived from the same inputs, generate its value on this pass as
777 * well.
778 */
779 if (!(done_mask & (1 << j)) &&
780 GET_SWZ(src0.swizzle, j) == src0_swiz) {
781 this_mask |= (1 << j);
782 }
783 }
784
785 if (this_mask != scs_mask) {
786 glsl_to_tgsi_instruction *inst;
787 st_dst_reg tmp_dst = st_dst_reg(tmp);
788
789 /* Emit the SCS instruction.
790 */
791 inst = emit(ir, TGSI_OPCODE_SCS, tmp_dst, src0);
792 inst->dst.writemask = scs_mask;
793
794 /* Move the result of the SCS instruction to the desired location in
795 * the destination.
796 */
797 tmp.swizzle = MAKE_SWIZZLE4(component, component,
798 component, component);
799 inst = emit(ir, TGSI_OPCODE_SCS, dst, tmp);
800 inst->dst.writemask = this_mask;
801 } else {
802 /* Emit the SCS instruction to write directly to the destination.
803 */
804 glsl_to_tgsi_instruction *inst = emit(ir, TGSI_OPCODE_SCS, dst, src0);
805 inst->dst.writemask = scs_mask;
806 }
807
808 done_mask |= this_mask;
809 }
810 }
811
812 struct st_src_reg
813 glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
814 {
815 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT);
816 union gl_constant_value uval;
817
818 uval.f = val;
819 src.index = _mesa_add_typed_unnamed_constant(this->immediates, &uval, 1,
820 GL_FLOAT, &src.swizzle);
821
822 return src;
823 }
824
825 struct st_src_reg
826 glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
827 {
828 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT);
829 union gl_constant_value uval;
830
831 assert(glsl_version >= 130);
832
833 uval.i = val;
834 src.index = _mesa_add_typed_unnamed_constant(this->immediates, &uval, 1,
835 GL_INT, &src.swizzle);
836
837 return src;
838 }
839
840 struct st_src_reg
841 glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val)
842 {
843 if (glsl_version >= 130)
844 return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) :
845 st_src_reg_for_int(val);
846 else
847 return st_src_reg_for_float(val);
848 }
849
850 static int
851 type_size(const struct glsl_type *type)
852 {
853 unsigned int i;
854 int size;
855
856 switch (type->base_type) {
857 case GLSL_TYPE_UINT:
858 case GLSL_TYPE_INT:
859 case GLSL_TYPE_FLOAT:
860 case GLSL_TYPE_BOOL:
861 if (type->is_matrix()) {
862 return type->matrix_columns;
863 } else {
864 /* Regardless of size of vector, it gets a vec4. This is bad
865 * packing for things like floats, but otherwise arrays become a
866 * mess. Hopefully a later pass over the code can pack scalars
867 * down if appropriate.
868 */
869 return 1;
870 }
871 case GLSL_TYPE_ARRAY:
872 assert(type->length > 0);
873 return type_size(type->fields.array) * type->length;
874 case GLSL_TYPE_STRUCT:
875 size = 0;
876 for (i = 0; i < type->length; i++) {
877 size += type_size(type->fields.structure[i].type);
878 }
879 return size;
880 case GLSL_TYPE_SAMPLER:
881 /* Samplers take up one slot in UNIFORMS[], but they're baked in
882 * at link time.
883 */
884 return 1;
885 default:
886 assert(0);
887 return 0;
888 }
889 }
890
891 /**
892 * In the initial pass of codegen, we assign temporary numbers to
893 * intermediate results. (not SSA -- variable assignments will reuse
894 * storage).
895 */
896 st_src_reg
897 glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
898 {
899 st_src_reg src;
900 int swizzle[4];
901 int i;
902
903 src.type = glsl_version >= 130 ? type->base_type : GLSL_TYPE_FLOAT;
904 src.file = PROGRAM_TEMPORARY;
905 src.index = next_temp;
906 src.reladdr = NULL;
907 next_temp += type_size(type);
908
909 if (type->is_array() || type->is_record()) {
910 src.swizzle = SWIZZLE_NOOP;
911 } else {
912 for (i = 0; i < type->vector_elements; i++)
913 swizzle[i] = i;
914 for (; i < 4; i++)
915 swizzle[i] = type->vector_elements - 1;
916 src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1],
917 swizzle[2], swizzle[3]);
918 }
919 src.negate = 0;
920
921 return src;
922 }
923
924 variable_storage *
925 glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var)
926 {
927
928 variable_storage *entry;
929
930 foreach_iter(exec_list_iterator, iter, this->variables) {
931 entry = (variable_storage *)iter.get();
932
933 if (entry->var == var)
934 return entry;
935 }
936
937 return NULL;
938 }
939
940 void
941 glsl_to_tgsi_visitor::visit(ir_variable *ir)
942 {
943 if (strcmp(ir->name, "gl_FragCoord") == 0) {
944 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
945
946 fp->OriginUpperLeft = ir->origin_upper_left;
947 fp->PixelCenterInteger = ir->pixel_center_integer;
948
949 } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
950 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
951 switch (ir->depth_layout) {
952 case ir_depth_layout_none:
953 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_NONE;
954 break;
955 case ir_depth_layout_any:
956 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_ANY;
957 break;
958 case ir_depth_layout_greater:
959 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_GREATER;
960 break;
961 case ir_depth_layout_less:
962 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_LESS;
963 break;
964 case ir_depth_layout_unchanged:
965 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_UNCHANGED;
966 break;
967 default:
968 assert(0);
969 break;
970 }
971 }
972
973 if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
974 unsigned int i;
975 const ir_state_slot *const slots = ir->state_slots;
976 assert(ir->state_slots != NULL);
977
978 /* Check if this statevar's setup in the STATE file exactly
979 * matches how we'll want to reference it as a
980 * struct/array/whatever. If not, then we need to move it into
981 * temporary storage and hope that it'll get copy-propagated
982 * out.
983 */
984 for (i = 0; i < ir->num_state_slots; i++) {
985 if (slots[i].swizzle != SWIZZLE_XYZW) {
986 break;
987 }
988 }
989
990 struct variable_storage *storage;
991 st_dst_reg dst;
992 if (i == ir->num_state_slots) {
993 /* We'll set the index later. */
994 storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
995 this->variables.push_tail(storage);
996
997 dst = undef_dst;
998 } else {
999 /* The variable_storage constructor allocates slots based on the size
1000 * of the type. However, this had better match the number of state
1001 * elements that we're going to copy into the new temporary.
1002 */
1003 assert((int) ir->num_state_slots == type_size(ir->type));
1004
1005 storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY,
1006 this->next_temp);
1007 this->variables.push_tail(storage);
1008 this->next_temp += type_size(ir->type);
1009
1010 dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index,
1011 glsl_version >= 130 ? ir->type->base_type : GLSL_TYPE_FLOAT));
1012 }
1013
1014
1015 for (unsigned int i = 0; i < ir->num_state_slots; i++) {
1016 int index = _mesa_add_state_reference(this->prog->Parameters,
1017 (gl_state_index *)slots[i].tokens);
1018
1019 if (storage->file == PROGRAM_STATE_VAR) {
1020 if (storage->index == -1) {
1021 storage->index = index;
1022 } else {
1023 assert(index == storage->index + (int)i);
1024 }
1025 } else {
1026 st_src_reg src(PROGRAM_STATE_VAR, index,
1027 glsl_version >= 130 ? ir->type->base_type : GLSL_TYPE_FLOAT);
1028 src.swizzle = slots[i].swizzle;
1029 emit(ir, TGSI_OPCODE_MOV, dst, src);
1030 /* even a float takes up a whole vec4 reg in a struct/array. */
1031 dst.index++;
1032 }
1033 }
1034
1035 if (storage->file == PROGRAM_TEMPORARY &&
1036 dst.index != storage->index + (int) ir->num_state_slots) {
1037 fail_link(this->shader_program,
1038 "failed to load builtin uniform `%s' (%d/%d regs loaded)\n",
1039 ir->name, dst.index - storage->index,
1040 type_size(ir->type));
1041 }
1042 }
1043 }
1044
1045 void
1046 glsl_to_tgsi_visitor::visit(ir_loop *ir)
1047 {
1048 ir_dereference_variable *counter = NULL;
1049
1050 if (ir->counter != NULL)
1051 counter = new(ir) ir_dereference_variable(ir->counter);
1052
1053 if (ir->from != NULL) {
1054 assert(ir->counter != NULL);
1055
1056 ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL);
1057
1058 a->accept(this);
1059 delete a;
1060 }
1061
1062 emit(NULL, TGSI_OPCODE_BGNLOOP);
1063
1064 if (ir->to) {
1065 ir_expression *e =
1066 new(ir) ir_expression(ir->cmp, glsl_type::bool_type,
1067 counter, ir->to);
1068 ir_if *if_stmt = new(ir) ir_if(e);
1069
1070 ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break);
1071
1072 if_stmt->then_instructions.push_tail(brk);
1073
1074 if_stmt->accept(this);
1075
1076 delete if_stmt;
1077 delete e;
1078 delete brk;
1079 }
1080
1081 visit_exec_list(&ir->body_instructions, this);
1082
1083 if (ir->increment) {
1084 ir_expression *e =
1085 new(ir) ir_expression(ir_binop_add, counter->type,
1086 counter, ir->increment);
1087
1088 ir_assignment *a = new(ir) ir_assignment(counter, e, NULL);
1089
1090 a->accept(this);
1091 delete a;
1092 delete e;
1093 }
1094
1095 emit(NULL, TGSI_OPCODE_ENDLOOP);
1096 }
1097
1098 void
1099 glsl_to_tgsi_visitor::visit(ir_loop_jump *ir)
1100 {
1101 switch (ir->mode) {
1102 case ir_loop_jump::jump_break:
1103 emit(NULL, TGSI_OPCODE_BRK);
1104 break;
1105 case ir_loop_jump::jump_continue:
1106 emit(NULL, TGSI_OPCODE_CONT);
1107 break;
1108 }
1109 }
1110
1111
1112 void
1113 glsl_to_tgsi_visitor::visit(ir_function_signature *ir)
1114 {
1115 assert(0);
1116 (void)ir;
1117 }
1118
1119 void
1120 glsl_to_tgsi_visitor::visit(ir_function *ir)
1121 {
1122 /* Ignore function bodies other than main() -- we shouldn't see calls to
1123 * them since they should all be inlined before we get to glsl_to_tgsi.
1124 */
1125 if (strcmp(ir->name, "main") == 0) {
1126 const ir_function_signature *sig;
1127 exec_list empty;
1128
1129 sig = ir->matching_signature(&empty);
1130
1131 assert(sig);
1132
1133 foreach_iter(exec_list_iterator, iter, sig->body) {
1134 ir_instruction *ir = (ir_instruction *)iter.get();
1135
1136 ir->accept(this);
1137 }
1138 }
1139 }
1140
1141 GLboolean
1142 glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
1143 {
1144 int nonmul_operand = 1 - mul_operand;
1145 st_src_reg a, b, c;
1146 st_dst_reg result_dst;
1147
1148 ir_expression *expr = ir->operands[mul_operand]->as_expression();
1149 if (!expr || expr->operation != ir_binop_mul)
1150 return false;
1151
1152 expr->operands[0]->accept(this);
1153 a = this->result;
1154 expr->operands[1]->accept(this);
1155 b = this->result;
1156 ir->operands[nonmul_operand]->accept(this);
1157 c = this->result;
1158
1159 this->result = get_temp(ir->type);
1160 result_dst = st_dst_reg(this->result);
1161 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1162 emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c);
1163
1164 return true;
1165 }
1166
1167 GLboolean
1168 glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
1169 {
1170 /* Saturates were only introduced to vertex programs in
1171 * NV_vertex_program3, so don't give them to drivers in the VP.
1172 */
1173 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB)
1174 return false;
1175
1176 ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
1177 if (!sat_src)
1178 return false;
1179
1180 sat_src->accept(this);
1181 st_src_reg src = this->result;
1182
1183 this->result = get_temp(ir->type);
1184 st_dst_reg result_dst = st_dst_reg(this->result);
1185 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1186 glsl_to_tgsi_instruction *inst;
1187 inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src);
1188 inst->saturate = true;
1189
1190 return true;
1191 }
1192
1193 void
1194 glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
1195 st_src_reg *reg, int *num_reladdr)
1196 {
1197 if (!reg->reladdr)
1198 return;
1199
1200 emit_arl(ir, address_reg, *reg->reladdr);
1201
1202 if (*num_reladdr != 1) {
1203 st_src_reg temp = get_temp(glsl_type::vec4_type);
1204
1205 emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg);
1206 *reg = temp;
1207 }
1208
1209 (*num_reladdr)--;
1210 }
1211
1212 void
1213 glsl_to_tgsi_visitor::visit(ir_expression *ir)
1214 {
1215 unsigned int operand;
1216 st_src_reg op[Elements(ir->operands)];
1217 st_src_reg result_src;
1218 st_dst_reg result_dst;
1219
1220 /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c)
1221 */
1222 if (ir->operation == ir_binop_add) {
1223 if (try_emit_mad(ir, 1))
1224 return;
1225 if (try_emit_mad(ir, 0))
1226 return;
1227 }
1228 if (try_emit_sat(ir))
1229 return;
1230
1231 if (ir->operation == ir_quadop_vector)
1232 assert(!"ir_quadop_vector should have been lowered");
1233
1234 for (operand = 0; operand < ir->get_num_operands(); operand++) {
1235 this->result.file = PROGRAM_UNDEFINED;
1236 ir->operands[operand]->accept(this);
1237 if (this->result.file == PROGRAM_UNDEFINED) {
1238 ir_print_visitor v;
1239 printf("Failed to get tree for expression operand:\n");
1240 ir->operands[operand]->accept(&v);
1241 exit(1);
1242 }
1243 op[operand] = this->result;
1244
1245 /* Matrix expression operands should have been broken down to vector
1246 * operations already.
1247 */
1248 assert(!ir->operands[operand]->type->is_matrix());
1249 }
1250
1251 int vector_elements = ir->operands[0]->type->vector_elements;
1252 if (ir->operands[1]) {
1253 vector_elements = MAX2(vector_elements,
1254 ir->operands[1]->type->vector_elements);
1255 }
1256
1257 this->result.file = PROGRAM_UNDEFINED;
1258
1259 /* Storage for our result. Ideally for an assignment we'd be using
1260 * the actual storage for the result here, instead.
1261 */
1262 result_src = get_temp(ir->type);
1263 /* convenience for the emit functions below. */
1264 result_dst = st_dst_reg(result_src);
1265 /* Limit writes to the channels that will be used by result_src later.
1266 * This does limit this temp's use as a temporary for multi-instruction
1267 * sequences.
1268 */
1269 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1270
1271 switch (ir->operation) {
1272 case ir_unop_logic_not:
1273 emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_type(result_dst.type, 0));
1274 break;
1275 case ir_unop_neg:
1276 assert(result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_INT);
1277 if (result_dst.type == GLSL_TYPE_INT)
1278 emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
1279 else {
1280 op[0].negate = ~op[0].negate;
1281 result_src = op[0];
1282 }
1283 break;
1284 case ir_unop_abs:
1285 assert(result_dst.type == GLSL_TYPE_FLOAT);
1286 emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]);
1287 break;
1288 case ir_unop_sign:
1289 emit(ir, TGSI_OPCODE_SSG, result_dst, op[0]);
1290 break;
1291 case ir_unop_rcp:
1292 emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]);
1293 break;
1294
1295 case ir_unop_exp2:
1296 emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]);
1297 break;
1298 case ir_unop_exp:
1299 case ir_unop_log:
1300 assert(!"not reached: should be handled by ir_explog_to_explog2");
1301 break;
1302 case ir_unop_log2:
1303 emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]);
1304 break;
1305 case ir_unop_sin:
1306 emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]);
1307 break;
1308 case ir_unop_cos:
1309 emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]);
1310 break;
1311 case ir_unop_sin_reduced:
1312 emit_scs(ir, TGSI_OPCODE_SIN, result_dst, op[0]);
1313 break;
1314 case ir_unop_cos_reduced:
1315 emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]);
1316 break;
1317
1318 case ir_unop_dFdx:
1319 emit(ir, TGSI_OPCODE_DDX, result_dst, op[0]);
1320 break;
1321 case ir_unop_dFdy:
1322 op[0].negate = ~op[0].negate;
1323 emit(ir, TGSI_OPCODE_DDY, result_dst, op[0]);
1324 break;
1325
1326 case ir_unop_noise: {
1327 /* At some point, a motivated person could add a better
1328 * implementation of noise. Currently not even the nvidia
1329 * binary drivers do anything more than this. In any case, the
1330 * place to do this is in the GL state tracker, not the poor
1331 * driver.
1332 */
1333 emit(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5));
1334 break;
1335 }
1336
1337 case ir_binop_add:
1338 emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
1339 break;
1340 case ir_binop_sub:
1341 emit(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]);
1342 break;
1343
1344 case ir_binop_mul:
1345 emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
1346 break;
1347 case ir_binop_div:
1348 if (result_dst.type == GLSL_TYPE_FLOAT)
1349 assert(!"not reached: should be handled by ir_div_to_mul_rcp");
1350 else
1351 emit(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]);
1352 break;
1353 case ir_binop_mod:
1354 if (result_dst.type == GLSL_TYPE_FLOAT)
1355 assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
1356 else
1357 emit(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]);
1358 break;
1359
1360 case ir_binop_less:
1361 emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]);
1362 break;
1363 case ir_binop_greater:
1364 emit(ir, TGSI_OPCODE_SGT, result_dst, op[0], op[1]);
1365 break;
1366 case ir_binop_lequal:
1367 emit(ir, TGSI_OPCODE_SLE, result_dst, op[0], op[1]);
1368 break;
1369 case ir_binop_gequal:
1370 emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]);
1371 break;
1372 case ir_binop_equal:
1373 emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
1374 break;
1375 case ir_binop_nequal:
1376 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1377 break;
1378 case ir_binop_all_equal:
1379 /* "==" operator producing a scalar boolean. */
1380 if (ir->operands[0]->type->is_vector() ||
1381 ir->operands[1]->type->is_vector()) {
1382 st_src_reg temp = get_temp(glsl_version >= 130 ?
1383 glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
1384 glsl_type::vec4_type);
1385 assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
1386 emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
1387 emit_dp(ir, result_dst, temp, temp, vector_elements);
1388 emit(ir, TGSI_OPCODE_SEQ, result_dst, result_src, st_src_reg_for_float(0.0));
1389 } else {
1390 emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
1391 }
1392 break;
1393 case ir_binop_any_nequal:
1394 /* "!=" operator producing a scalar boolean. */
1395 if (ir->operands[0]->type->is_vector() ||
1396 ir->operands[1]->type->is_vector()) {
1397 st_src_reg temp = get_temp(glsl_version >= 130 ?
1398 glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
1399 glsl_type::vec4_type);
1400 assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
1401 emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
1402 emit_dp(ir, result_dst, temp, temp, vector_elements);
1403 emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
1404 } else {
1405 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1406 }
1407 break;
1408
1409 case ir_unop_any:
1410 assert(ir->operands[0]->type->is_vector());
1411 emit_dp(ir, result_dst, op[0], op[0],
1412 ir->operands[0]->type->vector_elements);
1413 emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
1414 break;
1415
1416 case ir_binop_logic_xor:
1417 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1418 break;
1419
1420 case ir_binop_logic_or:
1421 /* This could be a saturated add and skip the SNE. */
1422 emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
1423 emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_float(0.0));
1424 break;
1425
1426 case ir_binop_logic_and:
1427 /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
1428 emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
1429 break;
1430
1431 case ir_binop_dot:
1432 assert(ir->operands[0]->type->is_vector());
1433 assert(ir->operands[0]->type == ir->operands[1]->type);
1434 emit_dp(ir, result_dst, op[0], op[1],
1435 ir->operands[0]->type->vector_elements);
1436 break;
1437
1438 case ir_unop_sqrt:
1439 /* sqrt(x) = x * rsq(x). */
1440 emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
1441 emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]);
1442 /* For incoming channels <= 0, set the result to 0. */
1443 op[0].negate = ~op[0].negate;
1444 emit(ir, TGSI_OPCODE_CMP, result_dst,
1445 op[0], result_src, st_src_reg_for_float(0.0));
1446 break;
1447 case ir_unop_rsq:
1448 emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
1449 break;
1450 case ir_unop_i2f:
1451 case ir_unop_b2f:
1452 if (glsl_version >= 130) {
1453 emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]);
1454 break;
1455 }
1456 case ir_unop_b2i:
1457 /* Booleans are stored as integers (or floats in GLSL 1.20 and lower). */
1458 result_src = op[0];
1459 break;
1460 case ir_unop_f2i:
1461 if (glsl_version >= 130)
1462 emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]);
1463 else
1464 emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
1465 break;
1466 case ir_unop_f2b:
1467 case ir_unop_i2b:
1468 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0],
1469 st_src_reg_for_type(result_dst.type, 0));
1470 break;
1471 case ir_unop_trunc:
1472 emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
1473 break;
1474 case ir_unop_ceil:
1475 op[0].negate = ~op[0].negate;
1476 emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
1477 result_src.negate = ~result_src.negate;
1478 break;
1479 case ir_unop_floor:
1480 emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
1481 break;
1482 case ir_unop_fract:
1483 emit(ir, TGSI_OPCODE_FRC, result_dst, op[0]);
1484 break;
1485
1486 case ir_binop_min:
1487 emit(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]);
1488 break;
1489 case ir_binop_max:
1490 emit(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]);
1491 break;
1492 case ir_binop_pow:
1493 emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]);
1494 break;
1495
1496 case ir_unop_bit_not:
1497 if (glsl_version >= 130) {
1498 emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
1499 break;
1500 }
1501 case ir_unop_u2f:
1502 if (glsl_version >= 130) {
1503 emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]);
1504 break;
1505 }
1506 case ir_binop_lshift:
1507 if (glsl_version >= 130) {
1508 emit(ir, TGSI_OPCODE_SHL, result_dst, op[0]);
1509 break;
1510 }
1511 case ir_binop_rshift:
1512 if (glsl_version >= 130) {
1513 emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0]);
1514 break;
1515 }
1516 case ir_binop_bit_and:
1517 if (glsl_version >= 130) {
1518 emit(ir, TGSI_OPCODE_AND, result_dst, op[0]);
1519 break;
1520 }
1521 case ir_binop_bit_xor:
1522 if (glsl_version >= 130) {
1523 emit(ir, TGSI_OPCODE_XOR, result_dst, op[0]);
1524 break;
1525 }
1526 case ir_binop_bit_or:
1527 if (glsl_version >= 130) {
1528 emit(ir, TGSI_OPCODE_OR, result_dst, op[0]);
1529 break;
1530 }
1531 case ir_unop_round_even:
1532 assert(!"GLSL 1.30 features unsupported");
1533 break;
1534
1535 case ir_quadop_vector:
1536 /* This operation should have already been handled.
1537 */
1538 assert(!"Should not get here.");
1539 break;
1540 }
1541
1542 this->result = result_src;
1543 }
1544
1545
1546 void
1547 glsl_to_tgsi_visitor::visit(ir_swizzle *ir)
1548 {
1549 st_src_reg src;
1550 int i;
1551 int swizzle[4];
1552
1553 /* Note that this is only swizzles in expressions, not those on the left
1554 * hand side of an assignment, which do write masking. See ir_assignment
1555 * for that.
1556 */
1557
1558 ir->val->accept(this);
1559 src = this->result;
1560 assert(src.file != PROGRAM_UNDEFINED);
1561
1562 for (i = 0; i < 4; i++) {
1563 if (i < ir->type->vector_elements) {
1564 switch (i) {
1565 case 0:
1566 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x);
1567 break;
1568 case 1:
1569 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y);
1570 break;
1571 case 2:
1572 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z);
1573 break;
1574 case 3:
1575 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w);
1576 break;
1577 }
1578 } else {
1579 /* If the type is smaller than a vec4, replicate the last
1580 * channel out.
1581 */
1582 swizzle[i] = swizzle[ir->type->vector_elements - 1];
1583 }
1584 }
1585
1586 src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1587
1588 this->result = src;
1589 }
1590
1591 void
1592 glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
1593 {
1594 variable_storage *entry = find_variable_storage(ir->var);
1595 ir_variable *var = ir->var;
1596
1597 if (!entry) {
1598 switch (var->mode) {
1599 case ir_var_uniform:
1600 entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
1601 var->location);
1602 this->variables.push_tail(entry);
1603 break;
1604 case ir_var_in:
1605 case ir_var_inout:
1606 /* The linker assigns locations for varyings and attributes,
1607 * including deprecated builtins (like gl_Color), user-assign
1608 * generic attributes (glBindVertexLocation), and
1609 * user-defined varyings.
1610 *
1611 * FINISHME: We would hit this path for function arguments. Fix!
1612 */
1613 assert(var->location != -1);
1614 entry = new(mem_ctx) variable_storage(var,
1615 PROGRAM_INPUT,
1616 var->location);
1617 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
1618 var->location >= VERT_ATTRIB_GENERIC0) {
1619 _mesa_add_attribute(this->prog->Attributes,
1620 var->name,
1621 _mesa_sizeof_glsl_type(var->type->gl_type),
1622 var->type->gl_type,
1623 var->location - VERT_ATTRIB_GENERIC0);
1624 }
1625 break;
1626 case ir_var_out:
1627 assert(var->location != -1);
1628 entry = new(mem_ctx) variable_storage(var,
1629 PROGRAM_OUTPUT,
1630 var->location);
1631 break;
1632 case ir_var_system_value:
1633 entry = new(mem_ctx) variable_storage(var,
1634 PROGRAM_SYSTEM_VALUE,
1635 var->location);
1636 break;
1637 case ir_var_auto:
1638 case ir_var_temporary:
1639 entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY,
1640 this->next_temp);
1641 this->variables.push_tail(entry);
1642
1643 next_temp += type_size(var->type);
1644 break;
1645 }
1646
1647 if (!entry) {
1648 printf("Failed to make storage for %s\n", var->name);
1649 exit(1);
1650 }
1651 }
1652
1653 this->result = st_src_reg(entry->file, entry->index, var->type);
1654 if (glsl_version <= 120)
1655 this->result.type = GLSL_TYPE_FLOAT;
1656 }
1657
1658 void
1659 glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
1660 {
1661 ir_constant *index;
1662 st_src_reg src;
1663 int element_size = type_size(ir->type);
1664
1665 index = ir->array_index->constant_expression_value();
1666
1667 ir->array->accept(this);
1668 src = this->result;
1669
1670 if (index) {
1671 src.index += index->value.i[0] * element_size;
1672 } else {
1673 st_src_reg array_base = this->result;
1674 /* Variable index array dereference. It eats the "vec4" of the
1675 * base of the array and an index that offsets the Mesa register
1676 * index.
1677 */
1678 ir->array_index->accept(this);
1679
1680 st_src_reg index_reg;
1681
1682 if (element_size == 1) {
1683 index_reg = this->result;
1684 } else {
1685 index_reg = get_temp(glsl_type::float_type);
1686
1687 emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg),
1688 this->result, st_src_reg_for_float(element_size));
1689 }
1690
1691 src.reladdr = ralloc(mem_ctx, st_src_reg);
1692 memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1693 }
1694
1695 /* If the type is smaller than a vec4, replicate the last channel out. */
1696 if (ir->type->is_scalar() || ir->type->is_vector())
1697 src.swizzle = swizzle_for_size(ir->type->vector_elements);
1698 else
1699 src.swizzle = SWIZZLE_NOOP;
1700
1701 this->result = src;
1702 }
1703
1704 void
1705 glsl_to_tgsi_visitor::visit(ir_dereference_record *ir)
1706 {
1707 unsigned int i;
1708 const glsl_type *struct_type = ir->record->type;
1709 int offset = 0;
1710
1711 ir->record->accept(this);
1712
1713 for (i = 0; i < struct_type->length; i++) {
1714 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1715 break;
1716 offset += type_size(struct_type->fields.structure[i].type);
1717 }
1718
1719 /* If the type is smaller than a vec4, replicate the last channel out. */
1720 if (ir->type->is_scalar() || ir->type->is_vector())
1721 this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1722 else
1723 this->result.swizzle = SWIZZLE_NOOP;
1724
1725 this->result.index += offset;
1726 }
1727
1728 /**
1729 * We want to be careful in assignment setup to hit the actual storage
1730 * instead of potentially using a temporary like we might with the
1731 * ir_dereference handler.
1732 */
1733 static st_dst_reg
1734 get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v)
1735 {
1736 /* The LHS must be a dereference. If the LHS is a variable indexed array
1737 * access of a vector, it must be separated into a series conditional moves
1738 * before reaching this point (see ir_vec_index_to_cond_assign).
1739 */
1740 assert(ir->as_dereference());
1741 ir_dereference_array *deref_array = ir->as_dereference_array();
1742 if (deref_array) {
1743 assert(!deref_array->array->type->is_vector());
1744 }
1745
1746 /* Use the rvalue deref handler for the most part. We'll ignore
1747 * swizzles in it and write swizzles using writemask, though.
1748 */
1749 ir->accept(v);
1750 return st_dst_reg(v->result);
1751 }
1752
1753 /**
1754 * Process the condition of a conditional assignment
1755 *
1756 * Examines the condition of a conditional assignment to generate the optimal
1757 * first operand of a \c CMP instruction. If the condition is a relational
1758 * operator with 0 (e.g., \c ir_binop_less), the value being compared will be
1759 * used as the source for the \c CMP instruction. Otherwise the comparison
1760 * is processed to a boolean result, and the boolean result is used as the
1761 * operand to the CMP instruction.
1762 */
1763 bool
1764 glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir)
1765 {
1766 ir_rvalue *src_ir = ir;
1767 bool negate = true;
1768 bool switch_order = false;
1769
1770 ir_expression *const expr = ir->as_expression();
1771 if ((expr != NULL) && (expr->get_num_operands() == 2)) {
1772 bool zero_on_left = false;
1773
1774 if (expr->operands[0]->is_zero()) {
1775 src_ir = expr->operands[1];
1776 zero_on_left = true;
1777 } else if (expr->operands[1]->is_zero()) {
1778 src_ir = expr->operands[0];
1779 zero_on_left = false;
1780 }
1781
1782 /* a is - 0 + - 0 +
1783 * (a < 0) T F F ( a < 0) T F F
1784 * (0 < a) F F T (-a < 0) F F T
1785 * (a <= 0) T T F (-a < 0) F F T (swap order of other operands)
1786 * (0 <= a) F T T ( a < 0) T F F (swap order of other operands)
1787 * (a > 0) F F T (-a < 0) F F T
1788 * (0 > a) T F F ( a < 0) T F F
1789 * (a >= 0) F T T ( a < 0) T F F (swap order of other operands)
1790 * (0 >= a) T T F (-a < 0) F F T (swap order of other operands)
1791 *
1792 * Note that exchanging the order of 0 and 'a' in the comparison simply
1793 * means that the value of 'a' should be negated.
1794 */
1795 if (src_ir != ir) {
1796 switch (expr->operation) {
1797 case ir_binop_less:
1798 switch_order = false;
1799 negate = zero_on_left;
1800 break;
1801
1802 case ir_binop_greater:
1803 switch_order = false;
1804 negate = !zero_on_left;
1805 break;
1806
1807 case ir_binop_lequal:
1808 switch_order = true;
1809 negate = !zero_on_left;
1810 break;
1811
1812 case ir_binop_gequal:
1813 switch_order = true;
1814 negate = zero_on_left;
1815 break;
1816
1817 default:
1818 /* This isn't the right kind of comparison afterall, so make sure
1819 * the whole condition is visited.
1820 */
1821 src_ir = ir;
1822 break;
1823 }
1824 }
1825 }
1826
1827 src_ir->accept(this);
1828
1829 /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
1830 * condition we produced is 0.0 or 1.0. By flipping the sign, we can
1831 * choose which value TGSI_OPCODE_CMP produces without an extra instruction
1832 * computing the condition.
1833 */
1834 if (negate)
1835 this->result.negate = ~this->result.negate;
1836
1837 return switch_order;
1838 }
1839
1840 void
1841 glsl_to_tgsi_visitor::visit(ir_assignment *ir)
1842 {
1843 st_dst_reg l;
1844 st_src_reg r;
1845 int i;
1846
1847 ir->rhs->accept(this);
1848 r = this->result;
1849
1850 l = get_assignment_lhs(ir->lhs, this);
1851
1852 /* FINISHME: This should really set to the correct maximal writemask for each
1853 * FINISHME: component written (in the loops below). This case can only
1854 * FINISHME: occur for matrices, arrays, and structures.
1855 */
1856 if (ir->write_mask == 0) {
1857 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
1858 l.writemask = WRITEMASK_XYZW;
1859 } else if (ir->lhs->type->is_scalar() &&
1860 ir->lhs->variable_referenced()->mode == ir_var_out) {
1861 /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the
1862 * FINISHME: W component of fragment shader output zero, work correctly.
1863 */
1864 l.writemask = WRITEMASK_XYZW;
1865 } else {
1866 int swizzles[4];
1867 int first_enabled_chan = 0;
1868 int rhs_chan = 0;
1869
1870 l.writemask = ir->write_mask;
1871
1872 for (int i = 0; i < 4; i++) {
1873 if (l.writemask & (1 << i)) {
1874 first_enabled_chan = GET_SWZ(r.swizzle, i);
1875 break;
1876 }
1877 }
1878
1879 /* Swizzle a small RHS vector into the channels being written.
1880 *
1881 * glsl ir treats write_mask as dictating how many channels are
1882 * present on the RHS while Mesa IR treats write_mask as just
1883 * showing which channels of the vec4 RHS get written.
1884 */
1885 for (int i = 0; i < 4; i++) {
1886 if (l.writemask & (1 << i))
1887 swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++);
1888 else
1889 swizzles[i] = first_enabled_chan;
1890 }
1891 r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1],
1892 swizzles[2], swizzles[3]);
1893 }
1894
1895 assert(l.file != PROGRAM_UNDEFINED);
1896 assert(r.file != PROGRAM_UNDEFINED);
1897
1898 if (ir->condition) {
1899 const bool switch_order = this->process_move_condition(ir->condition);
1900 st_src_reg condition = this->result;
1901
1902 for (i = 0; i < type_size(ir->lhs->type); i++) {
1903 st_src_reg l_src = st_src_reg(l);
1904 l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements);
1905
1906 if (switch_order) {
1907 emit(ir, TGSI_OPCODE_CMP, l, condition, l_src, r);
1908 } else {
1909 emit(ir, TGSI_OPCODE_CMP, l, condition, r, l_src);
1910 }
1911
1912 l.index++;
1913 r.index++;
1914 }
1915 } else if (ir->rhs->as_expression() &&
1916 this->instructions.get_tail() &&
1917 ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir &&
1918 type_size(ir->lhs->type) == 1) {
1919 /* To avoid emitting an extra MOV when assigning an expression to a
1920 * variable, emit the last instruction of the expression again, but
1921 * replace the destination register with the target of the assignment.
1922 * Dead code elimination will remove the original instruction.
1923 */
1924 glsl_to_tgsi_instruction *inst;
1925 inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
1926 emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]);
1927 } else {
1928 for (i = 0; i < type_size(ir->lhs->type); i++) {
1929 emit(ir, TGSI_OPCODE_MOV, l, r);
1930 l.index++;
1931 r.index++;
1932 }
1933 }
1934 }
1935
1936
1937 void
1938 glsl_to_tgsi_visitor::visit(ir_constant *ir)
1939 {
1940 st_src_reg src;
1941 GLfloat stack_vals[4] = { 0 };
1942 gl_constant_value *values = (gl_constant_value *) stack_vals;
1943 GLenum gl_type = GL_NONE;
1944 unsigned int i;
1945 gl_register_file file;
1946 gl_program_parameter_list *param_list;
1947 static int in_array = 0;
1948
1949 file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE;
1950 param_list = in_array ? this->prog->Parameters : this->immediates;
1951
1952 /* Unfortunately, 4 floats is all we can get into
1953 * _mesa_add_typed_unnamed_constant. So, make a temp to store an
1954 * aggregate constant and move each constant value into it. If we
1955 * get lucky, copy propagation will eliminate the extra moves.
1956 */
1957 if (ir->type->base_type == GLSL_TYPE_STRUCT) {
1958 st_src_reg temp_base = get_temp(ir->type);
1959 st_dst_reg temp = st_dst_reg(temp_base);
1960
1961 foreach_iter(exec_list_iterator, iter, ir->components) {
1962 ir_constant *field_value = (ir_constant *)iter.get();
1963 int size = type_size(field_value->type);
1964
1965 assert(size > 0);
1966
1967 field_value->accept(this);
1968 src = this->result;
1969
1970 for (i = 0; i < (unsigned int)size; i++) {
1971 emit(ir, TGSI_OPCODE_MOV, temp, src);
1972
1973 src.index++;
1974 temp.index++;
1975 }
1976 }
1977 this->result = temp_base;
1978 return;
1979 }
1980
1981 if (ir->type->is_array()) {
1982 st_src_reg temp_base = get_temp(ir->type);
1983 st_dst_reg temp = st_dst_reg(temp_base);
1984 int size = type_size(ir->type->fields.array);
1985
1986 assert(size > 0);
1987 in_array++;
1988
1989 for (i = 0; i < ir->type->length; i++) {
1990 ir->array_elements[i]->accept(this);
1991 src = this->result;
1992 for (int j = 0; j < size; j++) {
1993 emit(ir, TGSI_OPCODE_MOV, temp, src);
1994
1995 src.index++;
1996 temp.index++;
1997 }
1998 }
1999 this->result = temp_base;
2000 in_array--;
2001 return;
2002 }
2003
2004 if (ir->type->is_matrix()) {
2005 st_src_reg mat = get_temp(ir->type);
2006 st_dst_reg mat_column = st_dst_reg(mat);
2007
2008 for (i = 0; i < ir->type->matrix_columns; i++) {
2009 assert(ir->type->base_type == GLSL_TYPE_FLOAT);
2010 values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements];
2011
2012 src = st_src_reg(file, -1, ir->type->base_type);
2013 src.index = _mesa_add_typed_unnamed_constant(param_list,
2014 values,
2015 ir->type->vector_elements,
2016 GL_FLOAT,
2017 &src.swizzle);
2018 emit(ir, TGSI_OPCODE_MOV, mat_column, src);
2019
2020 mat_column.index++;
2021 }
2022
2023 this->result = mat;
2024 return;
2025 }
2026
2027 switch (ir->type->base_type) {
2028 case GLSL_TYPE_FLOAT:
2029 gl_type = GL_FLOAT;
2030 for (i = 0; i < ir->type->vector_elements; i++) {
2031 values[i].f = ir->value.f[i];
2032 }
2033 break;
2034 case GLSL_TYPE_UINT:
2035 gl_type = glsl_version >= 130 ? GL_UNSIGNED_INT : GL_FLOAT;
2036 for (i = 0; i < ir->type->vector_elements; i++) {
2037 if (glsl_version >= 130)
2038 values[i].u = ir->value.u[i];
2039 else
2040 values[i].f = ir->value.u[i];
2041 }
2042 break;
2043 case GLSL_TYPE_INT:
2044 gl_type = glsl_version >= 130 ? GL_INT : GL_FLOAT;
2045 for (i = 0; i < ir->type->vector_elements; i++) {
2046 if (glsl_version >= 130)
2047 values[i].i = ir->value.i[i];
2048 else
2049 values[i].f = ir->value.i[i];
2050 }
2051 break;
2052 case GLSL_TYPE_BOOL:
2053 gl_type = glsl_version >= 130 ? GL_BOOL : GL_FLOAT;
2054 for (i = 0; i < ir->type->vector_elements; i++) {
2055 if (glsl_version >= 130)
2056 values[i].b = ir->value.b[i];
2057 else
2058 values[i].f = ir->value.b[i];
2059 }
2060 break;
2061 default:
2062 assert(!"Non-float/uint/int/bool constant");
2063 }
2064
2065 this->result = st_src_reg(file, -1, ir->type);
2066 this->result.index = _mesa_add_typed_unnamed_constant(param_list,
2067 values, ir->type->vector_elements, gl_type,
2068 &this->result.swizzle);
2069 }
2070
2071 function_entry *
2072 glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig)
2073 {
2074 function_entry *entry;
2075
2076 foreach_iter(exec_list_iterator, iter, this->function_signatures) {
2077 entry = (function_entry *)iter.get();
2078
2079 if (entry->sig == sig)
2080 return entry;
2081 }
2082
2083 entry = ralloc(mem_ctx, function_entry);
2084 entry->sig = sig;
2085 entry->sig_id = this->next_signature_id++;
2086 entry->bgn_inst = NULL;
2087
2088 /* Allocate storage for all the parameters. */
2089 foreach_iter(exec_list_iterator, iter, sig->parameters) {
2090 ir_variable *param = (ir_variable *)iter.get();
2091 variable_storage *storage;
2092
2093 storage = find_variable_storage(param);
2094 assert(!storage);
2095
2096 storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY,
2097 this->next_temp);
2098 this->variables.push_tail(storage);
2099
2100 this->next_temp += type_size(param->type);
2101 }
2102
2103 if (!sig->return_type->is_void()) {
2104 entry->return_reg = get_temp(sig->return_type);
2105 } else {
2106 entry->return_reg = undef_src;
2107 }
2108
2109 this->function_signatures.push_tail(entry);
2110 return entry;
2111 }
2112
2113 void
2114 glsl_to_tgsi_visitor::visit(ir_call *ir)
2115 {
2116 glsl_to_tgsi_instruction *call_inst;
2117 ir_function_signature *sig = ir->get_callee();
2118 function_entry *entry = get_function_signature(sig);
2119 int i;
2120
2121 /* Process in parameters. */
2122 exec_list_iterator sig_iter = sig->parameters.iterator();
2123 foreach_iter(exec_list_iterator, iter, *ir) {
2124 ir_rvalue *param_rval = (ir_rvalue *)iter.get();
2125 ir_variable *param = (ir_variable *)sig_iter.get();
2126
2127 if (param->mode == ir_var_in ||
2128 param->mode == ir_var_inout) {
2129 variable_storage *storage = find_variable_storage(param);
2130 assert(storage);
2131
2132 param_rval->accept(this);
2133 st_src_reg r = this->result;
2134
2135 st_dst_reg l;
2136 l.file = storage->file;
2137 l.index = storage->index;
2138 l.reladdr = NULL;
2139 l.writemask = WRITEMASK_XYZW;
2140 l.cond_mask = COND_TR;
2141
2142 for (i = 0; i < type_size(param->type); i++) {
2143 emit(ir, TGSI_OPCODE_MOV, l, r);
2144 l.index++;
2145 r.index++;
2146 }
2147 }
2148
2149 sig_iter.next();
2150 }
2151 assert(!sig_iter.has_next());
2152
2153 /* Emit call instruction */
2154 call_inst = emit(ir, TGSI_OPCODE_CAL);
2155 call_inst->function = entry;
2156
2157 /* Process out parameters. */
2158 sig_iter = sig->parameters.iterator();
2159 foreach_iter(exec_list_iterator, iter, *ir) {
2160 ir_rvalue *param_rval = (ir_rvalue *)iter.get();
2161 ir_variable *param = (ir_variable *)sig_iter.get();
2162
2163 if (param->mode == ir_var_out ||
2164 param->mode == ir_var_inout) {
2165 variable_storage *storage = find_variable_storage(param);
2166 assert(storage);
2167
2168 st_src_reg r;
2169 r.file = storage->file;
2170 r.index = storage->index;
2171 r.reladdr = NULL;
2172 r.swizzle = SWIZZLE_NOOP;
2173 r.negate = 0;
2174
2175 param_rval->accept(this);
2176 st_dst_reg l = st_dst_reg(this->result);
2177
2178 for (i = 0; i < type_size(param->type); i++) {
2179 emit(ir, TGSI_OPCODE_MOV, l, r);
2180 l.index++;
2181 r.index++;
2182 }
2183 }
2184
2185 sig_iter.next();
2186 }
2187 assert(!sig_iter.has_next());
2188
2189 /* Process return value. */
2190 this->result = entry->return_reg;
2191 }
2192
2193 void
2194 glsl_to_tgsi_visitor::visit(ir_texture *ir)
2195 {
2196 st_src_reg result_src, coord, lod_info, projector, dx, dy;
2197 st_dst_reg result_dst, coord_dst;
2198 glsl_to_tgsi_instruction *inst = NULL;
2199 unsigned opcode = TGSI_OPCODE_NOP;
2200
2201 ir->coordinate->accept(this);
2202
2203 /* Put our coords in a temp. We'll need to modify them for shadow,
2204 * projection, or LOD, so the only case we'd use it as is is if
2205 * we're doing plain old texturing. Mesa IR optimization should
2206 * handle cleaning up our mess in that case.
2207 */
2208 coord = get_temp(glsl_type::vec4_type);
2209 coord_dst = st_dst_reg(coord);
2210 emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
2211
2212 if (ir->projector) {
2213 ir->projector->accept(this);
2214 projector = this->result;
2215 }
2216
2217 /* Storage for our result. Ideally for an assignment we'd be using
2218 * the actual storage for the result here, instead.
2219 */
2220 result_src = get_temp(glsl_type::vec4_type);
2221 result_dst = st_dst_reg(result_src);
2222
2223 switch (ir->op) {
2224 case ir_tex:
2225 opcode = TGSI_OPCODE_TEX;
2226 break;
2227 case ir_txb:
2228 opcode = TGSI_OPCODE_TXB;
2229 ir->lod_info.bias->accept(this);
2230 lod_info = this->result;
2231 break;
2232 case ir_txl:
2233 opcode = TGSI_OPCODE_TXL;
2234 ir->lod_info.lod->accept(this);
2235 lod_info = this->result;
2236 break;
2237 case ir_txd:
2238 opcode = TGSI_OPCODE_TXD;
2239 ir->lod_info.grad.dPdx->accept(this);
2240 dx = this->result;
2241 ir->lod_info.grad.dPdy->accept(this);
2242 dy = this->result;
2243 break;
2244 case ir_txf: /* TODO: use TGSI_OPCODE_TXF here */
2245 assert(!"GLSL 1.30 features unsupported");
2246 break;
2247 }
2248
2249 if (ir->projector) {
2250 if (opcode == TGSI_OPCODE_TEX) {
2251 /* Slot the projector in as the last component of the coord. */
2252 coord_dst.writemask = WRITEMASK_W;
2253 emit(ir, TGSI_OPCODE_MOV, coord_dst, projector);
2254 coord_dst.writemask = WRITEMASK_XYZW;
2255 opcode = TGSI_OPCODE_TXP;
2256 } else {
2257 st_src_reg coord_w = coord;
2258 coord_w.swizzle = SWIZZLE_WWWW;
2259
2260 /* For the other TEX opcodes there's no projective version
2261 * since the last slot is taken up by LOD info. Do the
2262 * projective divide now.
2263 */
2264 coord_dst.writemask = WRITEMASK_W;
2265 emit(ir, TGSI_OPCODE_RCP, coord_dst, projector);
2266
2267 /* In the case where we have to project the coordinates "by hand,"
2268 * the shadow comparator value must also be projected.
2269 */
2270 st_src_reg tmp_src = coord;
2271 if (ir->shadow_comparitor) {
2272 /* Slot the shadow value in as the second to last component of the
2273 * coord.
2274 */
2275 ir->shadow_comparitor->accept(this);
2276
2277 tmp_src = get_temp(glsl_type::vec4_type);
2278 st_dst_reg tmp_dst = st_dst_reg(tmp_src);
2279
2280 tmp_dst.writemask = WRITEMASK_Z;
2281 emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result);
2282
2283 tmp_dst.writemask = WRITEMASK_XY;
2284 emit(ir, TGSI_OPCODE_MOV, tmp_dst, coord);
2285 }
2286
2287 coord_dst.writemask = WRITEMASK_XYZ;
2288 emit(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w);
2289
2290 coord_dst.writemask = WRITEMASK_XYZW;
2291 coord.swizzle = SWIZZLE_XYZW;
2292 }
2293 }
2294
2295 /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow
2296 * comparator was put in the correct place (and projected) by the code,
2297 * above, that handles by-hand projection.
2298 */
2299 if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) {
2300 /* Slot the shadow value in as the second to last component of the
2301 * coord.
2302 */
2303 ir->shadow_comparitor->accept(this);
2304 coord_dst.writemask = WRITEMASK_Z;
2305 emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
2306 coord_dst.writemask = WRITEMASK_XYZW;
2307 }
2308
2309 if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB) {
2310 /* TGSI stores LOD or LOD bias in the last channel of the coords. */
2311 coord_dst.writemask = WRITEMASK_W;
2312 emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info);
2313 coord_dst.writemask = WRITEMASK_XYZW;
2314 }
2315
2316 if (opcode == TGSI_OPCODE_TXD)
2317 inst = emit(ir, opcode, result_dst, coord, dx, dy);
2318 else
2319 inst = emit(ir, opcode, result_dst, coord);
2320
2321 if (ir->shadow_comparitor)
2322 inst->tex_shadow = GL_TRUE;
2323
2324 inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler,
2325 this->shader_program,
2326 this->prog);
2327
2328 const glsl_type *sampler_type = ir->sampler->type;
2329
2330 switch (sampler_type->sampler_dimensionality) {
2331 case GLSL_SAMPLER_DIM_1D:
2332 inst->tex_target = (sampler_type->sampler_array)
2333 ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
2334 break;
2335 case GLSL_SAMPLER_DIM_2D:
2336 inst->tex_target = (sampler_type->sampler_array)
2337 ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
2338 break;
2339 case GLSL_SAMPLER_DIM_3D:
2340 inst->tex_target = TEXTURE_3D_INDEX;
2341 break;
2342 case GLSL_SAMPLER_DIM_CUBE:
2343 inst->tex_target = TEXTURE_CUBE_INDEX;
2344 break;
2345 case GLSL_SAMPLER_DIM_RECT:
2346 inst->tex_target = TEXTURE_RECT_INDEX;
2347 break;
2348 case GLSL_SAMPLER_DIM_BUF:
2349 assert(!"FINISHME: Implement ARB_texture_buffer_object");
2350 break;
2351 default:
2352 assert(!"Should not get here.");
2353 }
2354
2355 this->result = result_src;
2356 }
2357
2358 void
2359 glsl_to_tgsi_visitor::visit(ir_return *ir)
2360 {
2361 if (ir->get_value()) {
2362 st_dst_reg l;
2363 int i;
2364
2365 assert(current_function);
2366
2367 ir->get_value()->accept(this);
2368 st_src_reg r = this->result;
2369
2370 l = st_dst_reg(current_function->return_reg);
2371
2372 for (i = 0; i < type_size(current_function->sig->return_type); i++) {
2373 emit(ir, TGSI_OPCODE_MOV, l, r);
2374 l.index++;
2375 r.index++;
2376 }
2377 }
2378
2379 emit(ir, TGSI_OPCODE_RET);
2380 }
2381
2382 void
2383 glsl_to_tgsi_visitor::visit(ir_discard *ir)
2384 {
2385 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
2386
2387 if (ir->condition) {
2388 ir->condition->accept(this);
2389 this->result.negate = ~this->result.negate;
2390 emit(ir, TGSI_OPCODE_KIL, undef_dst, this->result);
2391 } else {
2392 emit(ir, TGSI_OPCODE_KILP);
2393 }
2394
2395 fp->UsesKill = GL_TRUE;
2396 }
2397
2398 void
2399 glsl_to_tgsi_visitor::visit(ir_if *ir)
2400 {
2401 glsl_to_tgsi_instruction *cond_inst, *if_inst, *else_inst = NULL;
2402 glsl_to_tgsi_instruction *prev_inst;
2403
2404 prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
2405
2406 ir->condition->accept(this);
2407 assert(this->result.file != PROGRAM_UNDEFINED);
2408
2409 if (this->options->EmitCondCodes) {
2410 cond_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
2411
2412 /* See if we actually generated any instruction for generating
2413 * the condition. If not, then cook up a move to a temp so we
2414 * have something to set cond_update on.
2415 */
2416 if (cond_inst == prev_inst) {
2417 st_src_reg temp = get_temp(glsl_type::bool_type);
2418 cond_inst = emit(ir->condition, TGSI_OPCODE_MOV, st_dst_reg(temp), result);
2419 }
2420 cond_inst->cond_update = GL_TRUE;
2421
2422 if_inst = emit(ir->condition, TGSI_OPCODE_IF);
2423 if_inst->dst.cond_mask = COND_NE;
2424 } else {
2425 if_inst = emit(ir->condition, TGSI_OPCODE_IF, undef_dst, this->result);
2426 }
2427
2428 this->instructions.push_tail(if_inst);
2429
2430 visit_exec_list(&ir->then_instructions, this);
2431
2432 if (!ir->else_instructions.is_empty()) {
2433 else_inst = emit(ir->condition, TGSI_OPCODE_ELSE);
2434 visit_exec_list(&ir->else_instructions, this);
2435 }
2436
2437 if_inst = emit(ir->condition, TGSI_OPCODE_ENDIF);
2438 }
2439
2440 glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
2441 {
2442 result.file = PROGRAM_UNDEFINED;
2443 next_temp = 1;
2444 next_signature_id = 1;
2445 current_function = NULL;
2446 num_address_regs = 0;
2447 indirect_addr_temps = false;
2448 indirect_addr_consts = false;
2449 immediates = _mesa_new_parameter_list();
2450 mem_ctx = ralloc_context(NULL);
2451 }
2452
2453 glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor()
2454 {
2455 _mesa_free_parameter_list(immediates);
2456 ralloc_free(mem_ctx);
2457 }
2458
2459 extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v)
2460 {
2461 delete v;
2462 }
2463
2464
2465 /**
2466 * Count resources used by the given gpu program (number of texture
2467 * samplers, etc).
2468 */
2469 static void
2470 count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
2471 {
2472 v->samplers_used = 0;
2473
2474 foreach_iter(exec_list_iterator, iter, v->instructions) {
2475 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
2476
2477 if (is_tex_instruction(inst->op)) {
2478 v->samplers_used |= 1 << inst->sampler;
2479
2480 prog->SamplerTargets[inst->sampler] =
2481 (gl_texture_index)inst->tex_target;
2482 if (inst->tex_shadow) {
2483 prog->ShadowSamplers |= 1 << inst->sampler;
2484 }
2485 }
2486 }
2487
2488 prog->SamplersUsed = v->samplers_used;
2489 _mesa_update_shader_textures_used(prog);
2490 }
2491
2492
2493 /**
2494 * Check if the given vertex/fragment/shader program is within the
2495 * resource limits of the context (number of texture units, etc).
2496 * If any of those checks fail, record a linker error.
2497 *
2498 * XXX more checks are needed...
2499 */
2500 static void
2501 check_resources(const struct gl_context *ctx,
2502 struct gl_shader_program *shader_program,
2503 glsl_to_tgsi_visitor *prog,
2504 struct gl_program *proginfo)
2505 {
2506 switch (proginfo->Target) {
2507 case GL_VERTEX_PROGRAM_ARB:
2508 if (_mesa_bitcount(prog->samplers_used) >
2509 ctx->Const.MaxVertexTextureImageUnits) {
2510 fail_link(shader_program, "Too many vertex shader texture samplers");
2511 }
2512 if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) {
2513 fail_link(shader_program, "Too many vertex shader constants");
2514 }
2515 break;
2516 case MESA_GEOMETRY_PROGRAM:
2517 if (_mesa_bitcount(prog->samplers_used) >
2518 ctx->Const.MaxGeometryTextureImageUnits) {
2519 fail_link(shader_program, "Too many geometry shader texture samplers");
2520 }
2521 if (proginfo->Parameters->NumParameters >
2522 MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) {
2523 fail_link(shader_program, "Too many geometry shader constants");
2524 }
2525 break;
2526 case GL_FRAGMENT_PROGRAM_ARB:
2527 if (_mesa_bitcount(prog->samplers_used) >
2528 ctx->Const.MaxTextureImageUnits) {
2529 fail_link(shader_program, "Too many fragment shader texture samplers");
2530 }
2531 if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) {
2532 fail_link(shader_program, "Too many fragment shader constants");
2533 }
2534 break;
2535 default:
2536 _mesa_problem(ctx, "unexpected program type in check_resources()");
2537 }
2538 }
2539
2540
2541
2542 struct uniform_sort {
2543 struct gl_uniform *u;
2544 int pos;
2545 };
2546
2547 /* The shader_program->Uniforms list is almost sorted in increasing
2548 * uniform->{Frag,Vert}Pos locations, but not quite when there are
2549 * uniforms shared between targets. We need to add parameters in
2550 * increasing order for the targets.
2551 */
2552 static int
2553 sort_uniforms(const void *a, const void *b)
2554 {
2555 struct uniform_sort *u1 = (struct uniform_sort *)a;
2556 struct uniform_sort *u2 = (struct uniform_sort *)b;
2557
2558 return u1->pos - u2->pos;
2559 }
2560
2561 /* Add the uniforms to the parameters. The linker chose locations
2562 * in our parameters lists (which weren't created yet), which the
2563 * uniforms code will use to poke values into our parameters list
2564 * when uniforms are updated.
2565 */
2566 static void
2567 add_uniforms_to_parameters_list(struct gl_shader_program *shader_program,
2568 struct gl_shader *shader,
2569 struct gl_program *prog)
2570 {
2571 unsigned int i;
2572 unsigned int next_sampler = 0, num_uniforms = 0;
2573 struct uniform_sort *sorted_uniforms;
2574
2575 sorted_uniforms = ralloc_array(NULL, struct uniform_sort,
2576 shader_program->Uniforms->NumUniforms);
2577
2578 for (i = 0; i < shader_program->Uniforms->NumUniforms; i++) {
2579 struct gl_uniform *uniform = shader_program->Uniforms->Uniforms + i;
2580 int parameter_index = -1;
2581
2582 switch (shader->Type) {
2583 case GL_VERTEX_SHADER:
2584 parameter_index = uniform->VertPos;
2585 break;
2586 case GL_FRAGMENT_SHADER:
2587 parameter_index = uniform->FragPos;
2588 break;
2589 case GL_GEOMETRY_SHADER:
2590 parameter_index = uniform->GeomPos;
2591 break;
2592 }
2593
2594 /* Only add uniforms used in our target. */
2595 if (parameter_index != -1) {
2596 sorted_uniforms[num_uniforms].pos = parameter_index;
2597 sorted_uniforms[num_uniforms].u = uniform;
2598 num_uniforms++;
2599 }
2600 }
2601
2602 qsort(sorted_uniforms, num_uniforms, sizeof(struct uniform_sort),
2603 sort_uniforms);
2604
2605 for (i = 0; i < num_uniforms; i++) {
2606 struct gl_uniform *uniform = sorted_uniforms[i].u;
2607 int parameter_index = sorted_uniforms[i].pos;
2608 const glsl_type *type = uniform->Type;
2609 unsigned int size;
2610
2611 if (type->is_vector() ||
2612 type->is_scalar()) {
2613 size = type->vector_elements;
2614 } else {
2615 size = type_size(type) * 4;
2616 }
2617
2618 gl_register_file file;
2619 if (type->is_sampler() ||
2620 (type->is_array() && type->fields.array->is_sampler())) {
2621 file = PROGRAM_SAMPLER;
2622 } else {
2623 file = PROGRAM_UNIFORM;
2624 }
2625
2626 GLint index = _mesa_lookup_parameter_index(prog->Parameters, -1,
2627 uniform->Name);
2628
2629 if (index < 0) {
2630 index = _mesa_add_parameter(prog->Parameters, file,
2631 uniform->Name, size, type->gl_type,
2632 NULL, NULL, 0x0);
2633
2634 /* Sampler uniform values are stored in prog->SamplerUnits,
2635 * and the entry in that array is selected by this index we
2636 * store in ParameterValues[].
2637 */
2638 if (file == PROGRAM_SAMPLER) {
2639 for (unsigned int j = 0; j < size / 4; j++)
2640 prog->Parameters->ParameterValues[index + j][0].f = next_sampler++;
2641 }
2642
2643 /* The location chosen in the Parameters list here (returned
2644 * from _mesa_add_uniform) has to match what the linker chose.
2645 */
2646 if (index != parameter_index) {
2647 fail_link(shader_program, "Allocation of uniform `%s' to target "
2648 "failed (%d vs %d)\n",
2649 uniform->Name, index, parameter_index);
2650 }
2651 }
2652 }
2653
2654 ralloc_free(sorted_uniforms);
2655 }
2656
2657 static void
2658 set_uniform_initializer(struct gl_context *ctx, void *mem_ctx,
2659 struct gl_shader_program *shader_program,
2660 const char *name, const glsl_type *type,
2661 ir_constant *val)
2662 {
2663 if (type->is_record()) {
2664 ir_constant *field_constant;
2665
2666 field_constant = (ir_constant *)val->components.get_head();
2667
2668 for (unsigned int i = 0; i < type->length; i++) {
2669 const glsl_type *field_type = type->fields.structure[i].type;
2670 const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name,
2671 type->fields.structure[i].name);
2672 set_uniform_initializer(ctx, mem_ctx, shader_program, field_name,
2673 field_type, field_constant);
2674 field_constant = (ir_constant *)field_constant->next;
2675 }
2676 return;
2677 }
2678
2679 int loc = _mesa_get_uniform_location(ctx, shader_program, name);
2680
2681 if (loc == -1) {
2682 fail_link(shader_program,
2683 "Couldn't find uniform for initializer %s\n", name);
2684 return;
2685 }
2686
2687 for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) {
2688 ir_constant *element;
2689 const glsl_type *element_type;
2690 if (type->is_array()) {
2691 element = val->array_elements[i];
2692 element_type = type->fields.array;
2693 } else {
2694 element = val;
2695 element_type = type;
2696 }
2697
2698 void *values;
2699
2700 if (element_type->base_type == GLSL_TYPE_BOOL) {
2701 int *conv = ralloc_array(mem_ctx, int, element_type->components());
2702 for (unsigned int j = 0; j < element_type->components(); j++) {
2703 conv[j] = element->value.b[j];
2704 }
2705 values = (void *)conv;
2706 element_type = glsl_type::get_instance(GLSL_TYPE_INT,
2707 element_type->vector_elements,
2708 1);
2709 } else {
2710 values = &element->value;
2711 }
2712
2713 if (element_type->is_matrix()) {
2714 _mesa_uniform_matrix(ctx, shader_program,
2715 element_type->matrix_columns,
2716 element_type->vector_elements,
2717 loc, 1, GL_FALSE, (GLfloat *)values);
2718 loc += element_type->matrix_columns;
2719 } else {
2720 _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns,
2721 values, element_type->gl_type);
2722 loc += type_size(element_type);
2723 }
2724 }
2725 }
2726
2727 static void
2728 set_uniform_initializers(struct gl_context *ctx,
2729 struct gl_shader_program *shader_program)
2730 {
2731 void *mem_ctx = NULL;
2732
2733 for (unsigned int i = 0; i < MESA_SHADER_TYPES; i++) {
2734 struct gl_shader *shader = shader_program->_LinkedShaders[i];
2735
2736 if (shader == NULL)
2737 continue;
2738
2739 foreach_iter(exec_list_iterator, iter, *shader->ir) {
2740 ir_instruction *ir = (ir_instruction *)iter.get();
2741 ir_variable *var = ir->as_variable();
2742
2743 if (!var || var->mode != ir_var_uniform || !var->constant_value)
2744 continue;
2745
2746 if (!mem_ctx)
2747 mem_ctx = ralloc_context(NULL);
2748
2749 set_uniform_initializer(ctx, mem_ctx, shader_program, var->name,
2750 var->type, var->constant_value);
2751 }
2752 }
2753
2754 ralloc_free(mem_ctx);
2755 }
2756
2757 /*
2758 * Scan/rewrite program to remove reads of custom (output) registers.
2759 * The passed type has to be either PROGRAM_OUTPUT or PROGRAM_VARYING
2760 * (for vertex shaders).
2761 * In GLSL shaders, varying vars can be read and written.
2762 * On some hardware, trying to read an output register causes trouble.
2763 * So, rewrite the program to use a temporary register in this case.
2764 *
2765 * Based on _mesa_remove_output_reads from programopt.c.
2766 */
2767 void
2768 glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type)
2769 {
2770 GLuint i;
2771 GLint outputMap[VERT_RESULT_MAX];
2772 GLint outputTypes[VERT_RESULT_MAX];
2773 GLuint numVaryingReads = 0;
2774 GLboolean usedTemps[MAX_TEMPS];
2775 GLuint firstTemp = 0;
2776
2777 _mesa_find_used_registers(prog, PROGRAM_TEMPORARY,
2778 usedTemps, MAX_TEMPS);
2779
2780 assert(type == PROGRAM_VARYING || type == PROGRAM_OUTPUT);
2781 assert(prog->Target == GL_VERTEX_PROGRAM_ARB || type != PROGRAM_VARYING);
2782
2783 for (i = 0; i < VERT_RESULT_MAX; i++)
2784 outputMap[i] = -1;
2785
2786 /* look for instructions which read from varying vars */
2787 foreach_iter(exec_list_iterator, iter, this->instructions) {
2788 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
2789 const GLuint numSrc = num_inst_src_regs(inst->op);
2790 GLuint j;
2791 for (j = 0; j < numSrc; j++) {
2792 if (inst->src[j].file == type) {
2793 /* replace the read with a temp reg */
2794 const GLuint var = inst->src[j].index;
2795 if (outputMap[var] == -1) {
2796 numVaryingReads++;
2797 outputMap[var] = _mesa_find_free_register(usedTemps,
2798 MAX_TEMPS,
2799 firstTemp);
2800 outputTypes[var] = inst->src[j].type;
2801 firstTemp = outputMap[var] + 1;
2802 }
2803 inst->src[j].file = PROGRAM_TEMPORARY;
2804 inst->src[j].index = outputMap[var];
2805 }
2806 }
2807 }
2808
2809 if (numVaryingReads == 0)
2810 return; /* nothing to be done */
2811
2812 /* look for instructions which write to the varying vars identified above */
2813 foreach_iter(exec_list_iterator, iter, this->instructions) {
2814 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
2815 if (inst->dst.file == type && outputMap[inst->dst.index] >= 0) {
2816 /* change inst to write to the temp reg, instead of the varying */
2817 inst->dst.file = PROGRAM_TEMPORARY;
2818 inst->dst.index = outputMap[inst->dst.index];
2819 }
2820 }
2821
2822 /* insert new MOV instructions at the end */
2823 for (i = 0; i < VERT_RESULT_MAX; i++) {
2824 if (outputMap[i] >= 0) {
2825 /* MOV VAR[i], TEMP[tmp]; */
2826 st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i], outputTypes[i]);
2827 st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW, outputTypes[i]);
2828 dst.index = i;
2829 this->emit(NULL, TGSI_OPCODE_MOV, dst, src);
2830 }
2831 }
2832 }
2833
2834 /**
2835 * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
2836 * are read from the given src in this instruction
2837 */
2838 static int
2839 get_src_arg_mask(st_dst_reg dst, st_src_reg src)
2840 {
2841 int read_mask = 0, comp;
2842
2843 /* Now, given the src swizzle and the written channels, find which
2844 * components are actually read
2845 */
2846 for (comp = 0; comp < 4; ++comp) {
2847 const unsigned coord = GET_SWZ(src.swizzle, comp);
2848 ASSERT(coord < 4);
2849 if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W)
2850 read_mask |= 1 << coord;
2851 }
2852
2853 return read_mask;
2854 }
2855
2856 /**
2857 * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP
2858 * instruction is the first instruction to write to register T0. There are
2859 * several lowering passes done in GLSL IR (e.g. branches and
2860 * relative addressing) that create a large number of conditional assignments
2861 * that ir_to_mesa converts to CMP instructions like the one mentioned above.
2862 *
2863 * Here is why this conversion is safe:
2864 * CMP T0, T1 T2 T0 can be expanded to:
2865 * if (T1 < 0.0)
2866 * MOV T0, T2;
2867 * else
2868 * MOV T0, T0;
2869 *
2870 * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same
2871 * as the original program. If (T1 < 0.0) evaluates to false, executing
2872 * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized.
2873 * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2
2874 * because any instruction that was going to read from T0 after this was going
2875 * to read a garbage value anyway.
2876 */
2877 void
2878 glsl_to_tgsi_visitor::simplify_cmp(void)
2879 {
2880 unsigned tempWrites[MAX_TEMPS];
2881 unsigned outputWrites[MAX_PROGRAM_OUTPUTS];
2882
2883 memset(tempWrites, 0, sizeof(tempWrites));
2884 memset(outputWrites, 0, sizeof(outputWrites));
2885
2886 foreach_iter(exec_list_iterator, iter, this->instructions) {
2887 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
2888 unsigned prevWriteMask = 0;
2889
2890 /* Give up if we encounter relative addressing or flow control. */
2891 if (inst->dst.reladdr ||
2892 tgsi_get_opcode_info(inst->op)->is_branch ||
2893 inst->op == TGSI_OPCODE_BGNSUB ||
2894 inst->op == TGSI_OPCODE_CONT ||
2895 inst->op == TGSI_OPCODE_END ||
2896 inst->op == TGSI_OPCODE_ENDSUB ||
2897 inst->op == TGSI_OPCODE_RET) {
2898 return;
2899 }
2900
2901 if (inst->dst.file == PROGRAM_OUTPUT) {
2902 assert(inst->dst.index < MAX_PROGRAM_OUTPUTS);
2903 prevWriteMask = outputWrites[inst->dst.index];
2904 outputWrites[inst->dst.index] |= inst->dst.writemask;
2905 } else if (inst->dst.file == PROGRAM_TEMPORARY) {
2906 assert(inst->dst.index < MAX_TEMPS);
2907 prevWriteMask = tempWrites[inst->dst.index];
2908 tempWrites[inst->dst.index] |= inst->dst.writemask;
2909 }
2910
2911 /* For a CMP to be considered a conditional write, the destination
2912 * register and source register two must be the same. */
2913 if (inst->op == TGSI_OPCODE_CMP
2914 && !(inst->dst.writemask & prevWriteMask)
2915 && inst->src[2].file == inst->dst.file
2916 && inst->src[2].index == inst->dst.index
2917 && inst->dst.writemask == get_src_arg_mask(inst->dst, inst->src[2])) {
2918
2919 inst->op = TGSI_OPCODE_MOV;
2920 inst->src[0] = inst->src[1];
2921 }
2922 }
2923 }
2924
2925 /* Replaces all references to a temporary register index with another index. */
2926 void
2927 glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index)
2928 {
2929 foreach_iter(exec_list_iterator, iter, this->instructions) {
2930 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
2931 unsigned j;
2932
2933 for (j=0; j < num_inst_src_regs(inst->op); j++) {
2934 if (inst->src[j].file == PROGRAM_TEMPORARY &&
2935 inst->src[j].index == index) {
2936 inst->src[j].index = new_index;
2937 }
2938 }
2939
2940 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
2941 inst->dst.index = new_index;
2942 }
2943 }
2944 }
2945
2946 int
2947 glsl_to_tgsi_visitor::get_first_temp_read(int index)
2948 {
2949 int depth = 0; /* loop depth */
2950 int loop_start = -1; /* index of the first active BGNLOOP (if any) */
2951 unsigned i = 0, j;
2952
2953 foreach_iter(exec_list_iterator, iter, this->instructions) {
2954 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
2955
2956 for (j=0; j < num_inst_src_regs(inst->op); j++) {
2957 if (inst->src[j].file == PROGRAM_TEMPORARY &&
2958 inst->src[j].index == index) {
2959 return (depth == 0) ? i : loop_start;
2960 }
2961 }
2962
2963 if (inst->op == TGSI_OPCODE_BGNLOOP) {
2964 if(depth++ == 0)
2965 loop_start = i;
2966 } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
2967 if (--depth == 0)
2968 loop_start = -1;
2969 }
2970 assert(depth >= 0);
2971
2972 i++;
2973 }
2974
2975 return -1;
2976 }
2977
2978 int
2979 glsl_to_tgsi_visitor::get_first_temp_write(int index)
2980 {
2981 int depth = 0; /* loop depth */
2982 int loop_start = -1; /* index of the first active BGNLOOP (if any) */
2983 int i = 0;
2984
2985 foreach_iter(exec_list_iterator, iter, this->instructions) {
2986 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
2987
2988 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
2989 return (depth == 0) ? i : loop_start;
2990 }
2991
2992 if (inst->op == TGSI_OPCODE_BGNLOOP) {
2993 if(depth++ == 0)
2994 loop_start = i;
2995 } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
2996 if (--depth == 0)
2997 loop_start = -1;
2998 }
2999 assert(depth >= 0);
3000
3001 i++;
3002 }
3003
3004 return -1;
3005 }
3006
3007 int
3008 glsl_to_tgsi_visitor::get_last_temp_read(int index)
3009 {
3010 int depth = 0; /* loop depth */
3011 int last = -1; /* index of last instruction that reads the temporary */
3012 unsigned i = 0, j;
3013
3014 foreach_iter(exec_list_iterator, iter, this->instructions) {
3015 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3016
3017 for (j=0; j < num_inst_src_regs(inst->op); j++) {
3018 if (inst->src[j].file == PROGRAM_TEMPORARY &&
3019 inst->src[j].index == index) {
3020 last = (depth == 0) ? i : -2;
3021 }
3022 }
3023
3024 if (inst->op == TGSI_OPCODE_BGNLOOP)
3025 depth++;
3026 else if (inst->op == TGSI_OPCODE_ENDLOOP)
3027 if (--depth == 0 && last == -2)
3028 last = i;
3029 assert(depth >= 0);
3030
3031 i++;
3032 }
3033
3034 assert(last >= -1);
3035 return last;
3036 }
3037
3038 int
3039 glsl_to_tgsi_visitor::get_last_temp_write(int index)
3040 {
3041 int depth = 0; /* loop depth */
3042 int last = -1; /* index of last instruction that writes to the temporary */
3043 int i = 0;
3044
3045 foreach_iter(exec_list_iterator, iter, this->instructions) {
3046 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3047
3048 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index)
3049 last = (depth == 0) ? i : -2;
3050
3051 if (inst->op == TGSI_OPCODE_BGNLOOP)
3052 depth++;
3053 else if (inst->op == TGSI_OPCODE_ENDLOOP)
3054 if (--depth == 0 && last == -2)
3055 last = i;
3056 assert(depth >= 0);
3057
3058 i++;
3059 }
3060
3061 assert(last >= -1);
3062 return last;
3063 }
3064
3065 /*
3066 * On a basic block basis, tracks available PROGRAM_TEMPORARY register
3067 * channels for copy propagation and updates following instructions to
3068 * use the original versions.
3069 *
3070 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
3071 * will occur. As an example, a TXP production before this pass:
3072 *
3073 * 0: MOV TEMP[1], INPUT[4].xyyy;
3074 * 1: MOV TEMP[1].w, INPUT[4].wwww;
3075 * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
3076 *
3077 * and after:
3078 *
3079 * 0: MOV TEMP[1], INPUT[4].xyyy;
3080 * 1: MOV TEMP[1].w, INPUT[4].wwww;
3081 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
3082 *
3083 * which allows for dead code elimination on TEMP[1]'s writes.
3084 */
3085 void
3086 glsl_to_tgsi_visitor::copy_propagate(void)
3087 {
3088 glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx,
3089 glsl_to_tgsi_instruction *,
3090 this->next_temp * 4);
3091 int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
3092 int level = 0;
3093
3094 foreach_iter(exec_list_iterator, iter, this->instructions) {
3095 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3096
3097 assert(inst->dst.file != PROGRAM_TEMPORARY
3098 || inst->dst.index < this->next_temp);
3099
3100 /* First, do any copy propagation possible into the src regs. */
3101 for (int r = 0; r < 3; r++) {
3102 glsl_to_tgsi_instruction *first = NULL;
3103 bool good = true;
3104 int acp_base = inst->src[r].index * 4;
3105
3106 if (inst->src[r].file != PROGRAM_TEMPORARY ||
3107 inst->src[r].reladdr)
3108 continue;
3109
3110 /* See if we can find entries in the ACP consisting of MOVs
3111 * from the same src register for all the swizzled channels
3112 * of this src register reference.
3113 */
3114 for (int i = 0; i < 4; i++) {
3115 int src_chan = GET_SWZ(inst->src[r].swizzle, i);
3116 glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan];
3117
3118 if (!copy_chan) {
3119 good = false;
3120 break;
3121 }
3122
3123 assert(acp_level[acp_base + src_chan] <= level);
3124
3125 if (!first) {
3126 first = copy_chan;
3127 } else {
3128 if (first->src[0].file != copy_chan->src[0].file ||
3129 first->src[0].index != copy_chan->src[0].index) {
3130 good = false;
3131 break;
3132 }
3133 }
3134 }
3135
3136 if (good) {
3137 /* We've now validated that we can copy-propagate to
3138 * replace this src register reference. Do it.
3139 */
3140 inst->src[r].file = first->src[0].file;
3141 inst->src[r].index = first->src[0].index;
3142
3143 int swizzle = 0;
3144 for (int i = 0; i < 4; i++) {
3145 int src_chan = GET_SWZ(inst->src[r].swizzle, i);
3146 glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan];
3147 swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) <<
3148 (3 * i));
3149 }
3150 inst->src[r].swizzle = swizzle;
3151 }
3152 }
3153
3154 switch (inst->op) {
3155 case TGSI_OPCODE_BGNLOOP:
3156 case TGSI_OPCODE_ENDLOOP:
3157 /* End of a basic block, clear the ACP entirely. */
3158 memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
3159 break;
3160
3161 case TGSI_OPCODE_IF:
3162 ++level;
3163 break;
3164
3165 case TGSI_OPCODE_ENDIF:
3166 case TGSI_OPCODE_ELSE:
3167 /* Clear all channels written inside the block from the ACP, but
3168 * leaving those that were not touched.
3169 */
3170 for (int r = 0; r < this->next_temp; r++) {
3171 for (int c = 0; c < 4; c++) {
3172 if (!acp[4 * r + c])
3173 continue;
3174
3175 if (acp_level[4 * r + c] >= level)
3176 acp[4 * r + c] = NULL;
3177 }
3178 }
3179 if (inst->op == TGSI_OPCODE_ENDIF)
3180 --level;
3181 break;
3182
3183 default:
3184 /* Continuing the block, clear any written channels from
3185 * the ACP.
3186 */
3187 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) {
3188 /* Any temporary might be written, so no copy propagation
3189 * across this instruction.
3190 */
3191 memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
3192 } else if (inst->dst.file == PROGRAM_OUTPUT &&
3193 inst->dst.reladdr) {
3194 /* Any output might be written, so no copy propagation
3195 * from outputs across this instruction.
3196 */
3197 for (int r = 0; r < this->next_temp; r++) {
3198 for (int c = 0; c < 4; c++) {
3199 if (!acp[4 * r + c])
3200 continue;
3201
3202 if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT)
3203 acp[4 * r + c] = NULL;
3204 }
3205 }
3206 } else if (inst->dst.file == PROGRAM_TEMPORARY ||
3207 inst->dst.file == PROGRAM_OUTPUT) {
3208 /* Clear where it's used as dst. */
3209 if (inst->dst.file == PROGRAM_TEMPORARY) {
3210 for (int c = 0; c < 4; c++) {
3211 if (inst->dst.writemask & (1 << c)) {
3212 acp[4 * inst->dst.index + c] = NULL;
3213 }
3214 }
3215 }
3216
3217 /* Clear where it's used as src. */
3218 for (int r = 0; r < this->next_temp; r++) {
3219 for (int c = 0; c < 4; c++) {
3220 if (!acp[4 * r + c])
3221 continue;
3222
3223 int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c);
3224
3225 if (acp[4 * r + c]->src[0].file == inst->dst.file &&
3226 acp[4 * r + c]->src[0].index == inst->dst.index &&
3227 inst->dst.writemask & (1 << src_chan))
3228 {
3229 acp[4 * r + c] = NULL;
3230 }
3231 }
3232 }
3233 }
3234 break;
3235 }
3236
3237 /* If this is a copy, add it to the ACP. */
3238 if (inst->op == TGSI_OPCODE_MOV &&
3239 inst->dst.file == PROGRAM_TEMPORARY &&
3240 !inst->dst.reladdr &&
3241 !inst->saturate &&
3242 !inst->src[0].reladdr &&
3243 !inst->src[0].negate) {
3244 for (int i = 0; i < 4; i++) {
3245 if (inst->dst.writemask & (1 << i)) {
3246 acp[4 * inst->dst.index + i] = inst;
3247 acp_level[4 * inst->dst.index + i] = level;
3248 }
3249 }
3250 }
3251 }
3252
3253 ralloc_free(acp_level);
3254 ralloc_free(acp);
3255 }
3256
3257 /*
3258 * Tracks available PROGRAM_TEMPORARY registers for dead code elimination.
3259 *
3260 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
3261 * will occur. As an example, a TXP production after copy propagation but
3262 * before this pass:
3263 *
3264 * 0: MOV TEMP[1], INPUT[4].xyyy;
3265 * 1: MOV TEMP[1].w, INPUT[4].wwww;
3266 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
3267 *
3268 * and after this pass:
3269 *
3270 * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
3271 *
3272 * FIXME: assumes that all functions are inlined (no support for BGNSUB/ENDSUB)
3273 * FIXME: doesn't eliminate all dead code inside of loops; it steps around them
3274 */
3275 void
3276 glsl_to_tgsi_visitor::eliminate_dead_code(void)
3277 {
3278 int i;
3279
3280 for (i=0; i < this->next_temp; i++) {
3281 int last_read = get_last_temp_read(i);
3282 int j = 0;
3283
3284 foreach_iter(exec_list_iterator, iter, this->instructions) {
3285 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3286
3287 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == i &&
3288 j > last_read)
3289 {
3290 iter.remove();
3291 delete inst;
3292 }
3293
3294 j++;
3295 }
3296 }
3297 }
3298
3299 /*
3300 * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead
3301 * code elimination. This is less primitive than eliminate_dead_code(), as it
3302 * is per-channel and can detect consecutive writes without a read between them
3303 * as dead code. However, there is some dead code that can be eliminated by
3304 * eliminate_dead_code() but not this function - for example, this function
3305 * cannot eliminate an instruction writing to a register that is never read and
3306 * is the only instruction writing to that register.
3307 *
3308 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
3309 * will occur.
3310 */
3311 int
3312 glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
3313 {
3314 glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx,
3315 glsl_to_tgsi_instruction *,
3316 this->next_temp * 4);
3317 int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
3318 int level = 0;
3319 int removed = 0;
3320
3321 foreach_iter(exec_list_iterator, iter, this->instructions) {
3322 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3323
3324 assert(inst->dst.file != PROGRAM_TEMPORARY
3325 || inst->dst.index < this->next_temp);
3326
3327 switch (inst->op) {
3328 case TGSI_OPCODE_BGNLOOP:
3329 case TGSI_OPCODE_ENDLOOP:
3330 /* End of a basic block, clear the write array entirely.
3331 * FIXME: This keeps us from killing dead code when the writes are
3332 * on either side of a loop, even when the register isn't touched
3333 * inside the loop.
3334 */
3335 memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
3336 break;
3337
3338 case TGSI_OPCODE_ENDIF:
3339 --level;
3340 break;
3341
3342 case TGSI_OPCODE_ELSE:
3343 /* Clear all channels written inside the preceding if block from the
3344 * write array, but leave those that were not touched.
3345 *
3346 * FIXME: This destroys opportunities to remove dead code inside of
3347 * IF blocks that are followed by an ELSE block.
3348 */
3349 for (int r = 0; r < this->next_temp; r++) {
3350 for (int c = 0; c < 4; c++) {
3351 if (!writes[4 * r + c])
3352 continue;
3353
3354 if (write_level[4 * r + c] >= level)
3355 writes[4 * r + c] = NULL;
3356 }
3357 }
3358 break;
3359
3360 case TGSI_OPCODE_IF:
3361 ++level;
3362 /* fallthrough to default case to mark the condition as read */
3363
3364 default:
3365 /* Continuing the block, clear any channels from the write array that
3366 * are read by this instruction.
3367 */
3368 for (int i = 0; i < 4; i++) {
3369 if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){
3370 /* Any temporary might be read, so no dead code elimination
3371 * across this instruction.
3372 */
3373 memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
3374 } else if (inst->src[i].file == PROGRAM_TEMPORARY) {
3375 /* Clear where it's used as src. */
3376 int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0);
3377 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1);
3378 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2);
3379 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3);
3380
3381 for (int c = 0; c < 4; c++) {
3382 if (src_chans & (1 << c)) {
3383 writes[4 * inst->src[i].index + c] = NULL;
3384 }
3385 }
3386 }
3387 }
3388 break;
3389 }
3390
3391 /* If this instruction writes to a temporary, add it to the write array.
3392 * If there is already an instruction in the write array for one or more
3393 * of the channels, flag that channel write as dead.
3394 */
3395 if (inst->dst.file == PROGRAM_TEMPORARY &&
3396 !inst->dst.reladdr &&
3397 !inst->saturate) {
3398 for (int c = 0; c < 4; c++) {
3399 if (inst->dst.writemask & (1 << c)) {
3400 if (writes[4 * inst->dst.index + c]) {
3401 if (write_level[4 * inst->dst.index + c] < level)
3402 continue;
3403 else
3404 writes[4 * inst->dst.index + c]->dead_mask |= (1 << c);
3405 }
3406 writes[4 * inst->dst.index + c] = inst;
3407 write_level[4 * inst->dst.index + c] = level;
3408 }
3409 }
3410 }
3411 }
3412
3413 /* Anything still in the write array at this point is dead code. */
3414 for (int r = 0; r < this->next_temp; r++) {
3415 for (int c = 0; c < 4; c++) {
3416 glsl_to_tgsi_instruction *inst = writes[4 * r + c];
3417 if (inst)
3418 inst->dead_mask |= (1 << c);
3419 }
3420 }
3421
3422 /* Now actually remove the instructions that are completely dead and update
3423 * the writemask of other instructions with dead channels.
3424 */
3425 foreach_iter(exec_list_iterator, iter, this->instructions) {
3426 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3427
3428 if (!inst->dead_mask || !inst->dst.writemask)
3429 continue;
3430 else if (inst->dead_mask == inst->dst.writemask) {
3431 iter.remove();
3432 delete inst;
3433 removed++;
3434 } else
3435 inst->dst.writemask &= ~(inst->dead_mask);
3436 }
3437
3438 ralloc_free(write_level);
3439 ralloc_free(writes);
3440
3441 return removed;
3442 }
3443
3444 /* Merges temporary registers together where possible to reduce the number of
3445 * registers needed to run a program.
3446 *
3447 * Produces optimal code only after copy propagation and dead code elimination
3448 * have been run. */
3449 void
3450 glsl_to_tgsi_visitor::merge_registers(void)
3451 {
3452 int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp);
3453 int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp);
3454 int i, j;
3455
3456 /* Read the indices of the last read and first write to each temp register
3457 * into an array so that we don't have to traverse the instruction list as
3458 * much. */
3459 for (i=0; i < this->next_temp; i++) {
3460 last_reads[i] = get_last_temp_read(i);
3461 first_writes[i] = get_first_temp_write(i);
3462 }
3463
3464 /* Start looking for registers with non-overlapping usages that can be
3465 * merged together. */
3466 for (i=0; i < this->next_temp; i++) {
3467 /* Don't touch unused registers. */
3468 if (last_reads[i] < 0 || first_writes[i] < 0) continue;
3469
3470 for (j=0; j < this->next_temp; j++) {
3471 /* Don't touch unused registers. */
3472 if (last_reads[j] < 0 || first_writes[j] < 0) continue;
3473
3474 /* We can merge the two registers if the first write to j is after or
3475 * in the same instruction as the last read from i. Note that the
3476 * register at index i will always be used earlier or at the same time
3477 * as the register at index j. */
3478 if (first_writes[i] <= first_writes[j] &&
3479 last_reads[i] <= first_writes[j])
3480 {
3481 rename_temp_register(j, i); /* Replace all references to j with i.*/
3482
3483 /* Update the first_writes and last_reads arrays with the new
3484 * values for the merged register index, and mark the newly unused
3485 * register index as such. */
3486 last_reads[i] = last_reads[j];
3487 first_writes[j] = -1;
3488 last_reads[j] = -1;
3489 }
3490 }
3491 }
3492
3493 ralloc_free(last_reads);
3494 ralloc_free(first_writes);
3495 }
3496
3497 /* Reassign indices to temporary registers by reusing unused indices created
3498 * by optimization passes. */
3499 void
3500 glsl_to_tgsi_visitor::renumber_registers(void)
3501 {
3502 int i = 0;
3503 int new_index = 0;
3504
3505 for (i=0; i < this->next_temp; i++) {
3506 if (get_first_temp_read(i) < 0) continue;
3507 if (i != new_index)
3508 rename_temp_register(i, new_index);
3509 new_index++;
3510 }
3511
3512 this->next_temp = new_index;
3513 }
3514
3515 /**
3516 * Returns a fragment program which implements the current pixel transfer ops.
3517 * Based on get_pixel_transfer_program in st_atom_pixeltransfer.c.
3518 */
3519 extern "C" void
3520 get_pixel_transfer_visitor(struct st_fragment_program *fp,
3521 glsl_to_tgsi_visitor *original,
3522 int scale_and_bias, int pixel_maps)
3523 {
3524 glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
3525 struct st_context *st = st_context(original->ctx);
3526 struct gl_program *prog = &fp->Base.Base;
3527 struct gl_program_parameter_list *params = _mesa_new_parameter_list();
3528 st_src_reg coord, src0;
3529 st_dst_reg dst0;
3530 glsl_to_tgsi_instruction *inst;
3531
3532 /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
3533 v->ctx = original->ctx;
3534 v->prog = prog;
3535 v->glsl_version = original->glsl_version;
3536 v->options = original->options;
3537 v->next_temp = original->next_temp;
3538 v->num_address_regs = original->num_address_regs;
3539 v->samplers_used = prog->SamplersUsed = original->samplers_used;
3540 v->indirect_addr_temps = original->indirect_addr_temps;
3541 v->indirect_addr_consts = original->indirect_addr_consts;
3542 _mesa_free_parameter_list(v->immediates);
3543 v->immediates = _mesa_clone_parameter_list(original->immediates);
3544
3545 /*
3546 * Get initial pixel color from the texture.
3547 * TEX colorTemp, fragment.texcoord[0], texture[0], 2D;
3548 */
3549 coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
3550 src0 = v->get_temp(glsl_type::vec4_type);
3551 dst0 = st_dst_reg(src0);
3552 inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
3553 inst->sampler = 0;
3554 inst->tex_target = TEXTURE_2D_INDEX;
3555
3556 prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0);
3557 prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */
3558 v->samplers_used |= (1 << 0);
3559
3560 if (scale_and_bias) {
3561 static const gl_state_index scale_state[STATE_LENGTH] =
3562 { STATE_INTERNAL, STATE_PT_SCALE,
3563 (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
3564 static const gl_state_index bias_state[STATE_LENGTH] =
3565 { STATE_INTERNAL, STATE_PT_BIAS,
3566 (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
3567 GLint scale_p, bias_p;
3568 st_src_reg scale, bias;
3569
3570 scale_p = _mesa_add_state_reference(params, scale_state);
3571 bias_p = _mesa_add_state_reference(params, bias_state);
3572
3573 /* MAD colorTemp, colorTemp, scale, bias; */
3574 scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT);
3575 bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT);
3576 inst = v->emit(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias);
3577 }
3578
3579 if (pixel_maps) {
3580 st_src_reg temp = v->get_temp(glsl_type::vec4_type);
3581 st_dst_reg temp_dst = st_dst_reg(temp);
3582
3583 assert(st->pixel_xfer.pixelmap_texture);
3584
3585 /* With a little effort, we can do four pixel map look-ups with
3586 * two TEX instructions:
3587 */
3588
3589 /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */
3590 temp_dst.writemask = WRITEMASK_XY; /* write R,G */
3591 inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
3592 inst->sampler = 1;
3593 inst->tex_target = TEXTURE_2D_INDEX;
3594
3595 /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */
3596 src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
3597 temp_dst.writemask = WRITEMASK_ZW; /* write B,A */
3598 inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
3599 inst->sampler = 1;
3600 inst->tex_target = TEXTURE_2D_INDEX;
3601
3602 prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */
3603 v->samplers_used |= (1 << 1);
3604
3605 /* MOV colorTemp, temp; */
3606 inst = v->emit(NULL, TGSI_OPCODE_MOV, dst0, temp);
3607 }
3608
3609 /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
3610 * new visitor. */
3611 foreach_iter(exec_list_iterator, iter, original->instructions) {
3612 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3613 st_src_reg src_regs[3];
3614
3615 if (inst->dst.file == PROGRAM_OUTPUT)
3616 prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index);
3617
3618 for (int i=0; i<3; i++) {
3619 src_regs[i] = inst->src[i];
3620 if (src_regs[i].file == PROGRAM_INPUT &&
3621 src_regs[i].index == FRAG_ATTRIB_COL0)
3622 {
3623 src_regs[i].file = PROGRAM_TEMPORARY;
3624 src_regs[i].index = src0.index;
3625 }
3626 else if (src_regs[i].file == PROGRAM_INPUT)
3627 prog->InputsRead |= (1 << src_regs[i].index);
3628 }
3629
3630 v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
3631 }
3632
3633 /* Make modifications to fragment program info. */
3634 prog->Parameters = _mesa_combine_parameter_lists(params,
3635 original->prog->Parameters);
3636 prog->Attributes = _mesa_clone_parameter_list(original->prog->Attributes);
3637 prog->Varying = _mesa_clone_parameter_list(original->prog->Varying);
3638 _mesa_free_parameter_list(params);
3639 count_resources(v, prog);
3640 fp->glsl_to_tgsi = v;
3641 }
3642
3643 /**
3644 * Make fragment program for glBitmap:
3645 * Sample the texture and kill the fragment if the bit is 0.
3646 * This program will be combined with the user's fragment program.
3647 *
3648 * Based on make_bitmap_fragment_program in st_cb_bitmap.c.
3649 */
3650 extern "C" void
3651 get_bitmap_visitor(struct st_fragment_program *fp,
3652 glsl_to_tgsi_visitor *original, int samplerIndex)
3653 {
3654 glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
3655 struct st_context *st = st_context(original->ctx);
3656 struct gl_program *prog = &fp->Base.Base;
3657 st_src_reg coord, src0;
3658 st_dst_reg dst0;
3659 glsl_to_tgsi_instruction *inst;
3660
3661 /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
3662 v->ctx = original->ctx;
3663 v->prog = prog;
3664 v->glsl_version = original->glsl_version;
3665 v->options = original->options;
3666 v->next_temp = original->next_temp;
3667 v->num_address_regs = original->num_address_regs;
3668 v->samplers_used = prog->SamplersUsed = original->samplers_used;
3669 v->indirect_addr_temps = original->indirect_addr_temps;
3670 v->indirect_addr_consts = original->indirect_addr_consts;
3671 _mesa_free_parameter_list(v->immediates);
3672 v->immediates = _mesa_clone_parameter_list(original->immediates);
3673
3674 /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
3675 coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
3676 src0 = v->get_temp(glsl_type::vec4_type);
3677 dst0 = st_dst_reg(src0);
3678 inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
3679 inst->sampler = samplerIndex;
3680 inst->tex_target = TEXTURE_2D_INDEX;
3681
3682 prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0);
3683 prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */
3684 v->samplers_used |= (1 << samplerIndex);
3685
3686 /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */
3687 src0.negate = NEGATE_XYZW;
3688 if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM)
3689 src0.swizzle = SWIZZLE_XXXX;
3690 inst = v->emit(NULL, TGSI_OPCODE_KIL, undef_dst, src0);
3691
3692 /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
3693 * new visitor. */
3694 foreach_iter(exec_list_iterator, iter, original->instructions) {
3695 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3696 st_src_reg src_regs[3];
3697
3698 if (inst->dst.file == PROGRAM_OUTPUT)
3699 prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index);
3700
3701 for (int i=0; i<3; i++) {
3702 src_regs[i] = inst->src[i];
3703 if (src_regs[i].file == PROGRAM_INPUT)
3704 prog->InputsRead |= (1 << src_regs[i].index);
3705 }
3706
3707 v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
3708 }
3709
3710 /* Make modifications to fragment program info. */
3711 prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters);
3712 prog->Attributes = _mesa_clone_parameter_list(original->prog->Attributes);
3713 prog->Varying = _mesa_clone_parameter_list(original->prog->Varying);
3714 count_resources(v, prog);
3715 fp->glsl_to_tgsi = v;
3716 }
3717
3718 /* ------------------------- TGSI conversion stuff -------------------------- */
3719 struct label {
3720 unsigned branch_target;
3721 unsigned token;
3722 };
3723
3724 /**
3725 * Intermediate state used during shader translation.
3726 */
3727 struct st_translate {
3728 struct ureg_program *ureg;
3729
3730 struct ureg_dst temps[MAX_TEMPS];
3731 struct ureg_src *constants;
3732 struct ureg_src *immediates;
3733 struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
3734 struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
3735 struct ureg_dst address[1];
3736 struct ureg_src samplers[PIPE_MAX_SAMPLERS];
3737 struct ureg_src systemValues[SYSTEM_VALUE_MAX];
3738
3739 /* Extra info for handling point size clamping in vertex shader */
3740 struct ureg_dst pointSizeResult; /**< Actual point size output register */
3741 struct ureg_src pointSizeConst; /**< Point size range constant register */
3742 GLint pointSizeOutIndex; /**< Temp point size output register */
3743 GLboolean prevInstWrotePointSize;
3744
3745 const GLuint *inputMapping;
3746 const GLuint *outputMapping;
3747
3748 /* For every instruction that contains a label (eg CALL), keep
3749 * details so that we can go back afterwards and emit the correct
3750 * tgsi instruction number for each label.
3751 */
3752 struct label *labels;
3753 unsigned labels_size;
3754 unsigned labels_count;
3755
3756 /* Keep a record of the tgsi instruction number that each mesa
3757 * instruction starts at, will be used to fix up labels after
3758 * translation.
3759 */
3760 unsigned *insn;
3761 unsigned insn_size;
3762 unsigned insn_count;
3763
3764 unsigned procType; /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */
3765
3766 boolean error;
3767 };
3768
3769 /** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */
3770 static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
3771 TGSI_SEMANTIC_FACE,
3772 TGSI_SEMANTIC_INSTANCEID
3773 };
3774
3775 /**
3776 * Make note of a branch to a label in the TGSI code.
3777 * After we've emitted all instructions, we'll go over the list
3778 * of labels built here and patch the TGSI code with the actual
3779 * location of each label.
3780 */
3781 static unsigned *get_label(struct st_translate *t, unsigned branch_target)
3782 {
3783 unsigned i;
3784
3785 if (t->labels_count + 1 >= t->labels_size) {
3786 t->labels_size = 1 << (util_logbase2(t->labels_size) + 1);
3787 t->labels = (struct label *)realloc(t->labels,
3788 t->labels_size * sizeof(struct label));
3789 if (t->labels == NULL) {
3790 static unsigned dummy;
3791 t->error = TRUE;
3792 return &dummy;
3793 }
3794 }
3795
3796 i = t->labels_count++;
3797 t->labels[i].branch_target = branch_target;
3798 return &t->labels[i].token;
3799 }
3800
3801 /**
3802 * Called prior to emitting the TGSI code for each Mesa instruction.
3803 * Allocate additional space for instructions if needed.
3804 * Update the insn[] array so the next Mesa instruction points to
3805 * the next TGSI instruction.
3806 */
3807 static void set_insn_start(struct st_translate *t, unsigned start)
3808 {
3809 if (t->insn_count + 1 >= t->insn_size) {
3810 t->insn_size = 1 << (util_logbase2(t->insn_size) + 1);
3811 t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof(t->insn[0]));
3812 if (t->insn == NULL) {
3813 t->error = TRUE;
3814 return;
3815 }
3816 }
3817
3818 t->insn[t->insn_count++] = start;
3819 }
3820
3821 /**
3822 * Map a glsl_to_tgsi constant/immediate to a TGSI immediate.
3823 */
3824 static struct ureg_src
3825 emit_immediate(struct st_translate *t,
3826 struct gl_program_parameter_list *params,
3827 int index)
3828 {
3829 struct ureg_program *ureg = t->ureg;
3830
3831 switch(params->Parameters[index].DataType)
3832 {
3833 case GL_FLOAT:
3834 case GL_FLOAT_VEC2:
3835 case GL_FLOAT_VEC3:
3836 case GL_FLOAT_VEC4:
3837 return ureg_DECL_immediate(ureg, (float *)params->ParameterValues[index], 4);
3838 case GL_INT:
3839 case GL_INT_VEC2:
3840 case GL_INT_VEC3:
3841 case GL_INT_VEC4:
3842 return ureg_DECL_immediate_int(ureg, (int *)params->ParameterValues[index], 4);
3843 case GL_UNSIGNED_INT:
3844 case GL_UNSIGNED_INT_VEC2:
3845 case GL_UNSIGNED_INT_VEC3:
3846 case GL_UNSIGNED_INT_VEC4:
3847 case GL_BOOL:
3848 case GL_BOOL_VEC2:
3849 case GL_BOOL_VEC3:
3850 case GL_BOOL_VEC4:
3851 return ureg_DECL_immediate_uint(ureg, (unsigned *)params->ParameterValues[index], 4);
3852 default:
3853 assert(!"should not get here - type must be float, int, uint, or bool");
3854 return ureg_src_undef();
3855 }
3856 }
3857
3858 /**
3859 * Map a Mesa dst register to a TGSI ureg_dst register.
3860 */
3861 static struct ureg_dst
3862 dst_register(struct st_translate *t,
3863 gl_register_file file,
3864 GLuint index)
3865 {
3866 switch(file) {
3867 case PROGRAM_UNDEFINED:
3868 return ureg_dst_undef();
3869
3870 case PROGRAM_TEMPORARY:
3871 if (ureg_dst_is_undef(t->temps[index]))
3872 t->temps[index] = ureg_DECL_temporary(t->ureg);
3873
3874 return t->temps[index];
3875
3876 case PROGRAM_OUTPUT:
3877 if (t->procType == TGSI_PROCESSOR_VERTEX && index == VERT_RESULT_PSIZ)
3878 t->prevInstWrotePointSize = GL_TRUE;
3879
3880 if (t->procType == TGSI_PROCESSOR_VERTEX)
3881 assert(index < VERT_RESULT_MAX);
3882 else if (t->procType == TGSI_PROCESSOR_FRAGMENT)
3883 assert(index < FRAG_RESULT_MAX);
3884 else
3885 assert(index < GEOM_RESULT_MAX);
3886
3887 assert(t->outputMapping[index] < Elements(t->outputs));
3888
3889 return t->outputs[t->outputMapping[index]];
3890
3891 case PROGRAM_ADDRESS:
3892 return t->address[index];
3893
3894 default:
3895 assert(!"unknown dst register file");
3896 return ureg_dst_undef();
3897 }
3898 }
3899
3900 /**
3901 * Map a Mesa src register to a TGSI ureg_src register.
3902 */
3903 static struct ureg_src
3904 src_register(struct st_translate *t,
3905 gl_register_file file,
3906 GLuint index)
3907 {
3908 switch(file) {
3909 case PROGRAM_UNDEFINED:
3910 return ureg_src_undef();
3911
3912 case PROGRAM_TEMPORARY:
3913 assert(index >= 0);
3914 assert(index < Elements(t->temps));
3915 if (ureg_dst_is_undef(t->temps[index]))
3916 t->temps[index] = ureg_DECL_temporary(t->ureg);
3917 return ureg_src(t->temps[index]);
3918
3919 case PROGRAM_NAMED_PARAM:
3920 case PROGRAM_ENV_PARAM:
3921 case PROGRAM_LOCAL_PARAM:
3922 case PROGRAM_UNIFORM:
3923 assert(index >= 0);
3924 return t->constants[index];
3925 case PROGRAM_STATE_VAR:
3926 case PROGRAM_CONSTANT: /* ie, immediate */
3927 if (index < 0)
3928 return ureg_DECL_constant(t->ureg, 0);
3929 else
3930 return t->constants[index];
3931
3932 case PROGRAM_IMMEDIATE:
3933 return t->immediates[index];
3934
3935 case PROGRAM_INPUT:
3936 assert(t->inputMapping[index] < Elements(t->inputs));
3937 return t->inputs[t->inputMapping[index]];
3938
3939 case PROGRAM_OUTPUT:
3940 assert(t->outputMapping[index] < Elements(t->outputs));
3941 return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */
3942
3943 case PROGRAM_ADDRESS:
3944 return ureg_src(t->address[index]);
3945
3946 case PROGRAM_SYSTEM_VALUE:
3947 assert(index < Elements(t->systemValues));
3948 return t->systemValues[index];
3949
3950 default:
3951 assert(!"unknown src register file");
3952 return ureg_src_undef();
3953 }
3954 }
3955
3956 /**
3957 * Create a TGSI ureg_dst register from an st_dst_reg.
3958 */
3959 static struct ureg_dst
3960 translate_dst(struct st_translate *t,
3961 const st_dst_reg *dst_reg,
3962 bool saturate)
3963 {
3964 struct ureg_dst dst = dst_register(t,
3965 dst_reg->file,
3966 dst_reg->index);
3967
3968 dst = ureg_writemask(dst, dst_reg->writemask);
3969
3970 if (saturate)
3971 dst = ureg_saturate(dst);
3972
3973 if (dst_reg->reladdr != NULL)
3974 dst = ureg_dst_indirect(dst, ureg_src(t->address[0]));
3975
3976 return dst;
3977 }
3978
3979 /**
3980 * Create a TGSI ureg_src register from an st_src_reg.
3981 */
3982 static struct ureg_src
3983 translate_src(struct st_translate *t, const st_src_reg *src_reg)
3984 {
3985 struct ureg_src src = src_register(t, src_reg->file, src_reg->index);
3986
3987 src = ureg_swizzle(src,
3988 GET_SWZ(src_reg->swizzle, 0) & 0x3,
3989 GET_SWZ(src_reg->swizzle, 1) & 0x3,
3990 GET_SWZ(src_reg->swizzle, 2) & 0x3,
3991 GET_SWZ(src_reg->swizzle, 3) & 0x3);
3992
3993 if ((src_reg->negate & 0xf) == NEGATE_XYZW)
3994 src = ureg_negate(src);
3995
3996 if (src_reg->reladdr != NULL) {
3997 /* Normally ureg_src_indirect() would be used here, but a stupid compiler
3998 * bug in g++ makes ureg_src_indirect (an inline C function) erroneously
3999 * set the bit for src.Negate. So we have to do the operation manually
4000 * here to work around the compiler's problems. */
4001 /*src = ureg_src_indirect(src, ureg_src(t->address[0]));*/
4002 struct ureg_src addr = ureg_src(t->address[0]);
4003 src.Indirect = 1;
4004 src.IndirectFile = addr.File;
4005 src.IndirectIndex = addr.Index;
4006 src.IndirectSwizzle = addr.SwizzleX;
4007
4008 if (src_reg->file != PROGRAM_INPUT &&
4009 src_reg->file != PROGRAM_OUTPUT) {
4010 /* If src_reg->index was negative, it was set to zero in
4011 * src_register(). Reassign it now. But don't do this
4012 * for input/output regs since they get remapped while
4013 * const buffers don't.
4014 */
4015 src.Index = src_reg->index;
4016 }
4017 }
4018
4019 return src;
4020 }
4021
4022 static void
4023 compile_tgsi_instruction(struct st_translate *t,
4024 const struct glsl_to_tgsi_instruction *inst)
4025 {
4026 struct ureg_program *ureg = t->ureg;
4027 GLuint i;
4028 struct ureg_dst dst[1];
4029 struct ureg_src src[4];
4030 unsigned num_dst;
4031 unsigned num_src;
4032
4033 num_dst = num_inst_dst_regs(inst->op);
4034 num_src = num_inst_src_regs(inst->op);
4035
4036 if (num_dst)
4037 dst[0] = translate_dst(t,
4038 &inst->dst,
4039 inst->saturate);
4040
4041 for (i = 0; i < num_src; i++)
4042 src[i] = translate_src(t, &inst->src[i]);
4043
4044 switch(inst->op) {
4045 case TGSI_OPCODE_BGNLOOP:
4046 case TGSI_OPCODE_CAL:
4047 case TGSI_OPCODE_ELSE:
4048 case TGSI_OPCODE_ENDLOOP:
4049 case TGSI_OPCODE_IF:
4050 assert(num_dst == 0);
4051 ureg_label_insn(ureg,
4052 inst->op,
4053 src, num_src,
4054 get_label(t,
4055 inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0));
4056 return;
4057
4058 case TGSI_OPCODE_TEX:
4059 case TGSI_OPCODE_TXB:
4060 case TGSI_OPCODE_TXD:
4061 case TGSI_OPCODE_TXL:
4062 case TGSI_OPCODE_TXP:
4063 src[num_src++] = t->samplers[inst->sampler];
4064 ureg_tex_insn(ureg,
4065 inst->op,
4066 dst, num_dst,
4067 translate_texture_target(inst->tex_target, inst->tex_shadow),
4068 src, num_src);
4069 return;
4070
4071 case TGSI_OPCODE_SCS:
4072 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY);
4073 ureg_insn(ureg, inst->op, dst, num_dst, src, num_src);
4074 break;
4075
4076 default:
4077 ureg_insn(ureg,
4078 inst->op,
4079 dst, num_dst,
4080 src, num_src);
4081 break;
4082 }
4083 }
4084
4085 /**
4086 * Emit the TGSI instructions to adjust the WPOS pixel center convention
4087 * Basically, add (adjX, adjY) to the fragment position.
4088 */
4089 static void
4090 emit_adjusted_wpos(struct st_translate *t,
4091 const struct gl_program *program,
4092 float adjX, float adjY)
4093 {
4094 struct ureg_program *ureg = t->ureg;
4095 struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg);
4096 struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
4097
4098 /* Note that we bias X and Y and pass Z and W through unchanged.
4099 * The shader might also use gl_FragCoord.w and .z.
4100 */
4101 ureg_ADD(ureg, wpos_temp, wpos_input,
4102 ureg_imm4f(ureg, adjX, adjY, 0.0f, 0.0f));
4103
4104 t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
4105 }
4106
4107
4108 /**
4109 * Emit the TGSI instructions for inverting the WPOS y coordinate.
4110 * This code is unavoidable because it also depends on whether
4111 * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM).
4112 */
4113 static void
4114 emit_wpos_inversion(struct st_translate *t,
4115 const struct gl_program *program,
4116 bool invert)
4117 {
4118 struct ureg_program *ureg = t->ureg;
4119
4120 /* Fragment program uses fragment position input.
4121 * Need to replace instances of INPUT[WPOS] with temp T
4122 * where T = INPUT[WPOS] by y is inverted.
4123 */
4124 static const gl_state_index wposTransformState[STATE_LENGTH]
4125 = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM,
4126 (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 };
4127
4128 /* XXX: note we are modifying the incoming shader here! Need to
4129 * do this before emitting the constant decls below, or this
4130 * will be missed:
4131 */
4132 unsigned wposTransConst = _mesa_add_state_reference(program->Parameters,
4133 wposTransformState);
4134
4135 struct ureg_src wpostrans = ureg_DECL_constant(ureg, wposTransConst);
4136 struct ureg_dst wpos_temp;
4137 struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
4138
4139 /* MOV wpos_temp, input[wpos]
4140 */
4141 if (wpos_input.File == TGSI_FILE_TEMPORARY)
4142 wpos_temp = ureg_dst(wpos_input);
4143 else {
4144 wpos_temp = ureg_DECL_temporary(ureg);
4145 ureg_MOV(ureg, wpos_temp, wpos_input);
4146 }
4147
4148 if (invert) {
4149 /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
4150 */
4151 ureg_MAD(ureg,
4152 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y),
4153 wpos_input,
4154 ureg_scalar(wpostrans, 0),
4155 ureg_scalar(wpostrans, 1));
4156 } else {
4157 /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
4158 */
4159 ureg_MAD(ureg,
4160 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y),
4161 wpos_input,
4162 ureg_scalar(wpostrans, 2),
4163 ureg_scalar(wpostrans, 3));
4164 }
4165
4166 /* Use wpos_temp as position input from here on:
4167 */
4168 t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
4169 }
4170
4171
4172 /**
4173 * Emit fragment position/ooordinate code.
4174 */
4175 static void
4176 emit_wpos(struct st_context *st,
4177 struct st_translate *t,
4178 const struct gl_program *program,
4179 struct ureg_program *ureg)
4180 {
4181 const struct gl_fragment_program *fp =
4182 (const struct gl_fragment_program *) program;
4183 struct pipe_screen *pscreen = st->pipe->screen;
4184 boolean invert = FALSE;
4185
4186 if (fp->OriginUpperLeft) {
4187 /* Fragment shader wants origin in upper-left */
4188 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
4189 /* the driver supports upper-left origin */
4190 }
4191 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) {
4192 /* the driver supports lower-left origin, need to invert Y */
4193 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
4194 invert = TRUE;
4195 }
4196 else
4197 assert(0);
4198 }
4199 else {
4200 /* Fragment shader wants origin in lower-left */
4201 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT))
4202 /* the driver supports lower-left origin */
4203 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
4204 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT))
4205 /* the driver supports upper-left origin, need to invert Y */
4206 invert = TRUE;
4207 else
4208 assert(0);
4209 }
4210
4211 if (fp->PixelCenterInteger) {
4212 /* Fragment shader wants pixel center integer */
4213 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER))
4214 /* the driver supports pixel center integer */
4215 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
4216 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER))
4217 /* the driver supports pixel center half integer, need to bias X,Y */
4218 emit_adjusted_wpos(t, program, 0.5f, invert ? 0.5f : -0.5f);
4219 else
4220 assert(0);
4221 }
4222 else {
4223 /* Fragment shader wants pixel center half integer */
4224 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
4225 /* the driver supports pixel center half integer */
4226 }
4227 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
4228 /* the driver supports pixel center integer, need to bias X,Y */
4229 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
4230 emit_adjusted_wpos(t, program, 0.5f, invert ? -0.5f : 0.5f);
4231 }
4232 else
4233 assert(0);
4234 }
4235
4236 /* we invert after adjustment so that we avoid the MOV to temporary,
4237 * and reuse the adjustment ADD instead */
4238 emit_wpos_inversion(t, program, invert);
4239 }
4240
4241 /**
4242 * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back.
4243 * TGSI uses +1 for front, -1 for back.
4244 * This function converts the TGSI value to the GL value. Simply clamping/
4245 * saturating the value to [0,1] does the job.
4246 */
4247 static void
4248 emit_face_var(struct st_translate *t)
4249 {
4250 struct ureg_program *ureg = t->ureg;
4251 struct ureg_dst face_temp = ureg_DECL_temporary(ureg);
4252 struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]];
4253
4254 /* MOV_SAT face_temp, input[face] */
4255 face_temp = ureg_saturate(face_temp);
4256 ureg_MOV(ureg, face_temp, face_input);
4257
4258 /* Use face_temp as face input from here on: */
4259 t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp);
4260 }
4261
4262 static void
4263 emit_edgeflags(struct st_translate *t)
4264 {
4265 struct ureg_program *ureg = t->ureg;
4266 struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]];
4267 struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]];
4268
4269 ureg_MOV(ureg, edge_dst, edge_src);
4270 }
4271
4272 /**
4273 * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format.
4274 * \param program the program to translate
4275 * \param numInputs number of input registers used
4276 * \param inputMapping maps Mesa fragment program inputs to TGSI generic
4277 * input indexes
4278 * \param inputSemanticName the TGSI_SEMANTIC flag for each input
4279 * \param inputSemanticIndex the semantic index (ex: which texcoord) for
4280 * each input
4281 * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
4282 * \param numOutputs number of output registers used
4283 * \param outputMapping maps Mesa fragment program outputs to TGSI
4284 * generic outputs
4285 * \param outputSemanticName the TGSI_SEMANTIC flag for each output
4286 * \param outputSemanticIndex the semantic index (ex: which texcoord) for
4287 * each output
4288 *
4289 * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY
4290 */
4291 extern "C" enum pipe_error
4292 st_translate_program(
4293 struct gl_context *ctx,
4294 uint procType,
4295 struct ureg_program *ureg,
4296 glsl_to_tgsi_visitor *program,
4297 const struct gl_program *proginfo,
4298 GLuint numInputs,
4299 const GLuint inputMapping[],
4300 const ubyte inputSemanticName[],
4301 const ubyte inputSemanticIndex[],
4302 const GLuint interpMode[],
4303 GLuint numOutputs,
4304 const GLuint outputMapping[],
4305 const ubyte outputSemanticName[],
4306 const ubyte outputSemanticIndex[],
4307 boolean passthrough_edgeflags)
4308 {
4309 struct st_translate translate, *t;
4310 unsigned i;
4311 enum pipe_error ret = PIPE_OK;
4312
4313 assert(numInputs <= Elements(t->inputs));
4314 assert(numOutputs <= Elements(t->outputs));
4315
4316 t = &translate;
4317 memset(t, 0, sizeof *t);
4318
4319 t->procType = procType;
4320 t->inputMapping = inputMapping;
4321 t->outputMapping = outputMapping;
4322 t->ureg = ureg;
4323 t->pointSizeOutIndex = -1;
4324 t->prevInstWrotePointSize = GL_FALSE;
4325
4326 /*
4327 * Declare input attributes.
4328 */
4329 if (procType == TGSI_PROCESSOR_FRAGMENT) {
4330 for (i = 0; i < numInputs; i++) {
4331 t->inputs[i] = ureg_DECL_fs_input(ureg,
4332 inputSemanticName[i],
4333 inputSemanticIndex[i],
4334 interpMode[i]);
4335 }
4336
4337 if (proginfo->InputsRead & FRAG_BIT_WPOS) {
4338 /* Must do this after setting up t->inputs, and before
4339 * emitting constant references, below:
4340 */
4341 emit_wpos(st_context(ctx), t, proginfo, ureg);
4342 }
4343
4344 if (proginfo->InputsRead & FRAG_BIT_FACE)
4345 emit_face_var(t);
4346
4347 /*
4348 * Declare output attributes.
4349 */
4350 for (i = 0; i < numOutputs; i++) {
4351 switch (outputSemanticName[i]) {
4352 case TGSI_SEMANTIC_POSITION:
4353 t->outputs[i] = ureg_DECL_output(ureg,
4354 TGSI_SEMANTIC_POSITION, /* Z/Depth */
4355 outputSemanticIndex[i]);
4356 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z);
4357 break;
4358 case TGSI_SEMANTIC_STENCIL:
4359 t->outputs[i] = ureg_DECL_output(ureg,
4360 TGSI_SEMANTIC_STENCIL, /* Stencil */
4361 outputSemanticIndex[i]);
4362 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y);
4363 break;
4364 case TGSI_SEMANTIC_COLOR:
4365 t->outputs[i] = ureg_DECL_output(ureg,
4366 TGSI_SEMANTIC_COLOR,
4367 outputSemanticIndex[i]);
4368 break;
4369 default:
4370 assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR");
4371 return PIPE_ERROR_BAD_INPUT;
4372 }
4373 }
4374 }
4375 else if (procType == TGSI_PROCESSOR_GEOMETRY) {
4376 for (i = 0; i < numInputs; i++) {
4377 t->inputs[i] = ureg_DECL_gs_input(ureg,
4378 i,
4379 inputSemanticName[i],
4380 inputSemanticIndex[i]);
4381 }
4382
4383 for (i = 0; i < numOutputs; i++) {
4384 t->outputs[i] = ureg_DECL_output(ureg,
4385 outputSemanticName[i],
4386 outputSemanticIndex[i]);
4387 }
4388 }
4389 else {
4390 assert(procType == TGSI_PROCESSOR_VERTEX);
4391
4392 for (i = 0; i < numInputs; i++) {
4393 t->inputs[i] = ureg_DECL_vs_input(ureg, i);
4394 }
4395
4396 for (i = 0; i < numOutputs; i++) {
4397 t->outputs[i] = ureg_DECL_output(ureg,
4398 outputSemanticName[i],
4399 outputSemanticIndex[i]);
4400 if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && proginfo->Id) {
4401 /* Writing to the point size result register requires special
4402 * handling to implement clamping.
4403 */
4404 static const gl_state_index pointSizeClampState[STATE_LENGTH]
4405 = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 };
4406 /* XXX: note we are modifying the incoming shader here! Need to
4407 * do this before emitting the constant decls below, or this
4408 * will be missed.
4409 */
4410 unsigned pointSizeClampConst =
4411 _mesa_add_state_reference(proginfo->Parameters,
4412 pointSizeClampState);
4413 struct ureg_dst psizregtemp = ureg_DECL_temporary(ureg);
4414 t->pointSizeConst = ureg_DECL_constant(ureg, pointSizeClampConst);
4415 t->pointSizeResult = t->outputs[i];
4416 t->pointSizeOutIndex = i;
4417 t->outputs[i] = psizregtemp;
4418 }
4419 }
4420 if (passthrough_edgeflags)
4421 emit_edgeflags(t);
4422 }
4423
4424 /* Declare address register.
4425 */
4426 if (program->num_address_regs > 0) {
4427 assert(program->num_address_regs == 1);
4428 t->address[0] = ureg_DECL_address(ureg);
4429 }
4430
4431 /* Declare misc input registers
4432 */
4433 {
4434 GLbitfield sysInputs = proginfo->SystemValuesRead;
4435 unsigned numSys = 0;
4436 for (i = 0; sysInputs; i++) {
4437 if (sysInputs & (1 << i)) {
4438 unsigned semName = mesa_sysval_to_semantic[i];
4439 t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0);
4440 numSys++;
4441 sysInputs &= ~(1 << i);
4442 }
4443 }
4444 }
4445
4446 if (program->indirect_addr_temps) {
4447 /* If temps are accessed with indirect addressing, declare temporaries
4448 * in sequential order. Else, we declare them on demand elsewhere.
4449 * (Note: the number of temporaries is equal to program->next_temp)
4450 */
4451 for (i = 0; i < (unsigned)program->next_temp; i++) {
4452 /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */
4453 t->temps[i] = ureg_DECL_temporary(t->ureg);
4454 }
4455 }
4456
4457 /* Emit constants and uniforms. TGSI uses a single index space for these,
4458 * so we put all the translated regs in t->constants.
4459 */
4460 if (proginfo->Parameters) {
4461 t->constants = (struct ureg_src *)CALLOC(proginfo->Parameters->NumParameters * sizeof(t->constants[0]));
4462 if (t->constants == NULL) {
4463 ret = PIPE_ERROR_OUT_OF_MEMORY;
4464 goto out;
4465 }
4466
4467 for (i = 0; i < proginfo->Parameters->NumParameters; i++) {
4468 switch (proginfo->Parameters->Parameters[i].Type) {
4469 case PROGRAM_ENV_PARAM:
4470 case PROGRAM_LOCAL_PARAM:
4471 case PROGRAM_STATE_VAR:
4472 case PROGRAM_NAMED_PARAM:
4473 case PROGRAM_UNIFORM:
4474 t->constants[i] = ureg_DECL_constant(ureg, i);
4475 break;
4476
4477 /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect
4478 * addressing of the const buffer.
4479 * FIXME: Be smarter and recognize param arrays:
4480 * indirect addressing is only valid within the referenced
4481 * array.
4482 */
4483 case PROGRAM_CONSTANT:
4484 if (program->indirect_addr_consts)
4485 t->constants[i] = ureg_DECL_constant(ureg, i);
4486 else
4487 t->constants[i] = emit_immediate(t, proginfo->Parameters, i);
4488 break;
4489 default:
4490 break;
4491 }
4492 }
4493 }
4494
4495 /* Emit immediate values.
4496 */
4497 t->immediates = (struct ureg_src *)CALLOC(program->immediates->NumParameters * sizeof(struct ureg_src));
4498 if (t->immediates == NULL) {
4499 ret = PIPE_ERROR_OUT_OF_MEMORY;
4500 goto out;
4501 }
4502 for (i = 0; i < program->immediates->NumParameters; i++) {
4503 assert(program->immediates->Parameters[i].Type == PROGRAM_IMMEDIATE);
4504 t->immediates[i] = emit_immediate(t, program->immediates, i);
4505 }
4506
4507 /* texture samplers */
4508 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
4509 if (program->samplers_used & (1 << i)) {
4510 t->samplers[i] = ureg_DECL_sampler(ureg, i);
4511 }
4512 }
4513
4514 /* Emit each instruction in turn:
4515 */
4516 foreach_iter(exec_list_iterator, iter, program->instructions) {
4517 set_insn_start(t, ureg_get_instruction_number(ureg));
4518 compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get());
4519
4520 if (t->prevInstWrotePointSize && proginfo->Id) {
4521 /* The previous instruction wrote to the (fake) vertex point size
4522 * result register. Now we need to clamp that value to the min/max
4523 * point size range, putting the result into the real point size
4524 * register.
4525 * Note that we can't do this easily at the end of program due to
4526 * possible early return.
4527 */
4528 set_insn_start(t, ureg_get_instruction_number(ureg));
4529 ureg_MAX(t->ureg,
4530 ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X),
4531 ureg_src(t->outputs[t->pointSizeOutIndex]),
4532 ureg_swizzle(t->pointSizeConst, 1,1,1,1));
4533 ureg_MIN(t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X),
4534 ureg_src(t->outputs[t->pointSizeOutIndex]),
4535 ureg_swizzle(t->pointSizeConst, 2,2,2,2));
4536 }
4537 t->prevInstWrotePointSize = GL_FALSE;
4538 }
4539
4540 /* Fix up all emitted labels:
4541 */
4542 for (i = 0; i < t->labels_count; i++) {
4543 ureg_fixup_label(ureg, t->labels[i].token,
4544 t->insn[t->labels[i].branch_target]);
4545 }
4546
4547 out:
4548 FREE(t->insn);
4549 FREE(t->labels);
4550 FREE(t->constants);
4551 FREE(t->immediates);
4552
4553 if (t->error) {
4554 debug_printf("%s: translate error flag set\n", __FUNCTION__);
4555 }
4556
4557 return ret;
4558 }
4559 /* ----------------------------- End TGSI code ------------------------------ */
4560
4561 /**
4562 * Convert a shader's GLSL IR into a Mesa gl_program, although without
4563 * generating Mesa IR.
4564 */
4565 static struct gl_program *
4566 get_mesa_program(struct gl_context *ctx,
4567 struct gl_shader_program *shader_program,
4568 struct gl_shader *shader)
4569 {
4570 glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor();
4571 struct gl_program *prog;
4572 GLenum target;
4573 const char *target_string;
4574 bool progress;
4575 struct gl_shader_compiler_options *options =
4576 &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)];
4577
4578 switch (shader->Type) {
4579 case GL_VERTEX_SHADER:
4580 target = GL_VERTEX_PROGRAM_ARB;
4581 target_string = "vertex";
4582 break;
4583 case GL_FRAGMENT_SHADER:
4584 target = GL_FRAGMENT_PROGRAM_ARB;
4585 target_string = "fragment";
4586 break;
4587 case GL_GEOMETRY_SHADER:
4588 target = GL_GEOMETRY_PROGRAM_NV;
4589 target_string = "geometry";
4590 break;
4591 default:
4592 assert(!"should not be reached");
4593 return NULL;
4594 }
4595
4596 validate_ir_tree(shader->ir);
4597
4598 prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
4599 if (!prog)
4600 return NULL;
4601 prog->Parameters = _mesa_new_parameter_list();
4602 prog->Varying = _mesa_new_parameter_list();
4603 prog->Attributes = _mesa_new_parameter_list();
4604 v->ctx = ctx;
4605 v->prog = prog;
4606 v->shader_program = shader_program;
4607 v->options = options;
4608 v->glsl_version = ctx->Const.GLSLVersion;
4609
4610 add_uniforms_to_parameters_list(shader_program, shader, prog);
4611
4612 /* Emit intermediate IR for main(). */
4613 visit_exec_list(shader->ir, v);
4614
4615 /* Now emit bodies for any functions that were used. */
4616 do {
4617 progress = GL_FALSE;
4618
4619 foreach_iter(exec_list_iterator, iter, v->function_signatures) {
4620 function_entry *entry = (function_entry *)iter.get();
4621
4622 if (!entry->bgn_inst) {
4623 v->current_function = entry;
4624
4625 entry->bgn_inst = v->emit(NULL, TGSI_OPCODE_BGNSUB);
4626 entry->bgn_inst->function = entry;
4627
4628 visit_exec_list(&entry->sig->body, v);
4629
4630 glsl_to_tgsi_instruction *last;
4631 last = (glsl_to_tgsi_instruction *)v->instructions.get_tail();
4632 if (last->op != TGSI_OPCODE_RET)
4633 v->emit(NULL, TGSI_OPCODE_RET);
4634
4635 glsl_to_tgsi_instruction *end;
4636 end = v->emit(NULL, TGSI_OPCODE_ENDSUB);
4637 end->function = entry;
4638
4639 progress = GL_TRUE;
4640 }
4641 }
4642 } while (progress);
4643
4644 #if 0
4645 /* Print out some information (for debugging purposes) used by the
4646 * optimization passes. */
4647 for (i=0; i < v->next_temp; i++) {
4648 int fr = v->get_first_temp_read(i);
4649 int fw = v->get_first_temp_write(i);
4650 int lr = v->get_last_temp_read(i);
4651 int lw = v->get_last_temp_write(i);
4652
4653 printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw);
4654 assert(fw <= fr);
4655 }
4656 #endif
4657
4658 /* Remove reads to output registers, and to varyings in vertex shaders. */
4659 v->remove_output_reads(PROGRAM_OUTPUT);
4660 if (target == GL_VERTEX_PROGRAM_ARB)
4661 v->remove_output_reads(PROGRAM_VARYING);
4662
4663 /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
4664 v->simplify_cmp();
4665 v->copy_propagate();
4666 while (v->eliminate_dead_code_advanced());
4667
4668 /* FIXME: These passes to optimize temporary registers don't work when there
4669 * is indirect addressing of the temporary register space. We need proper
4670 * array support so that we don't have to give up these passes in every
4671 * shader that uses arrays.
4672 */
4673 if (!v->indirect_addr_temps) {
4674 v->eliminate_dead_code();
4675 v->merge_registers();
4676 v->renumber_registers();
4677 }
4678
4679 /* Write the END instruction. */
4680 v->emit(NULL, TGSI_OPCODE_END);
4681
4682 if (ctx->Shader.Flags & GLSL_DUMP) {
4683 printf("\n");
4684 printf("GLSL IR for linked %s program %d:\n", target_string,
4685 shader_program->Name);
4686 _mesa_print_ir(shader->ir, NULL);
4687 printf("\n");
4688 printf("\n");
4689 }
4690
4691 prog->Instructions = NULL;
4692 prog->NumInstructions = 0;
4693
4694 do_set_program_inouts(shader->ir, prog);
4695 count_resources(v, prog);
4696
4697 check_resources(ctx, shader_program, v, prog);
4698
4699 _mesa_reference_program(ctx, &shader->Program, prog);
4700
4701 struct st_vertex_program *stvp;
4702 struct st_fragment_program *stfp;
4703 struct st_geometry_program *stgp;
4704
4705 switch (shader->Type) {
4706 case GL_VERTEX_SHADER:
4707 stvp = (struct st_vertex_program *)prog;
4708 stvp->glsl_to_tgsi = v;
4709 break;
4710 case GL_FRAGMENT_SHADER:
4711 stfp = (struct st_fragment_program *)prog;
4712 stfp->glsl_to_tgsi = v;
4713 break;
4714 case GL_GEOMETRY_SHADER:
4715 stgp = (struct st_geometry_program *)prog;
4716 stgp->glsl_to_tgsi = v;
4717 break;
4718 default:
4719 assert(!"should not be reached");
4720 return NULL;
4721 }
4722
4723 return prog;
4724 }
4725
4726 extern "C" {
4727
4728 struct gl_shader *
4729 st_new_shader(struct gl_context *ctx, GLuint name, GLuint type)
4730 {
4731 struct gl_shader *shader;
4732 assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER ||
4733 type == GL_GEOMETRY_SHADER_ARB);
4734 shader = rzalloc(NULL, struct gl_shader);
4735 if (shader) {
4736 shader->Type = type;
4737 shader->Name = name;
4738 _mesa_init_shader(ctx, shader);
4739 }
4740 return shader;
4741 }
4742
4743 struct gl_shader_program *
4744 st_new_shader_program(struct gl_context *ctx, GLuint name)
4745 {
4746 struct gl_shader_program *shProg;
4747 shProg = rzalloc(NULL, struct gl_shader_program);
4748 if (shProg) {
4749 shProg->Name = name;
4750 _mesa_init_shader_program(ctx, shProg);
4751 }
4752 return shProg;
4753 }
4754
4755 /**
4756 * Link a shader.
4757 * Called via ctx->Driver.LinkShader()
4758 * This actually involves converting GLSL IR into an intermediate TGSI-like IR
4759 * with code lowering and other optimizations.
4760 */
4761 GLboolean
4762 st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
4763 {
4764 assert(prog->LinkStatus);
4765
4766 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
4767 if (prog->_LinkedShaders[i] == NULL)
4768 continue;
4769
4770 bool progress;
4771 exec_list *ir = prog->_LinkedShaders[i]->ir;
4772 const struct gl_shader_compiler_options *options =
4773 &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)];
4774
4775 do {
4776 progress = false;
4777
4778 /* Lowering */
4779 do_mat_op_to_vec(ir);
4780 lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
4781 | LOG_TO_LOG2
4782 | ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
4783
4784 progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
4785
4786 progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress;
4787
4788 progress = lower_quadop_vector(ir, true) || progress;
4789
4790 if (options->EmitNoIfs) {
4791 progress = lower_discard(ir) || progress;
4792 progress = lower_if_to_cond_assign(ir) || progress;
4793 }
4794
4795 if (options->EmitNoNoise)
4796 progress = lower_noise(ir) || progress;
4797
4798 /* If there are forms of indirect addressing that the driver
4799 * cannot handle, perform the lowering pass.
4800 */
4801 if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput
4802 || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform)
4803 progress =
4804 lower_variable_index_to_cond_assign(ir,
4805 options->EmitNoIndirectInput,
4806 options->EmitNoIndirectOutput,
4807 options->EmitNoIndirectTemp,
4808 options->EmitNoIndirectUniform)
4809 || progress;
4810
4811 progress = do_vec_index_to_cond_assign(ir) || progress;
4812 } while (progress);
4813
4814 validate_ir_tree(ir);
4815 }
4816
4817 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
4818 struct gl_program *linked_prog;
4819
4820 if (prog->_LinkedShaders[i] == NULL)
4821 continue;
4822
4823 linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
4824
4825 if (linked_prog) {
4826 bool ok = true;
4827
4828 switch (prog->_LinkedShaders[i]->Type) {
4829 case GL_VERTEX_SHADER:
4830 _mesa_reference_vertprog(ctx, &prog->VertexProgram,
4831 (struct gl_vertex_program *)linked_prog);
4832 ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB,
4833 linked_prog);
4834 break;
4835 case GL_FRAGMENT_SHADER:
4836 _mesa_reference_fragprog(ctx, &prog->FragmentProgram,
4837 (struct gl_fragment_program *)linked_prog);
4838 ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB,
4839 linked_prog);
4840 break;
4841 case GL_GEOMETRY_SHADER:
4842 _mesa_reference_geomprog(ctx, &prog->GeometryProgram,
4843 (struct gl_geometry_program *)linked_prog);
4844 ok = ctx->Driver.ProgramStringNotify(ctx, GL_GEOMETRY_PROGRAM_NV,
4845 linked_prog);
4846 break;
4847 }
4848 if (!ok) {
4849 return GL_FALSE;
4850 }
4851 }
4852
4853 _mesa_reference_program(ctx, &linked_prog, NULL);
4854 }
4855
4856 return GL_TRUE;
4857 }
4858
4859
4860 /**
4861 * Link a GLSL shader program. Called via glLinkProgram().
4862 */
4863 void
4864 st_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
4865 {
4866 unsigned int i;
4867
4868 _mesa_clear_shader_program_data(ctx, prog);
4869
4870 prog->LinkStatus = GL_TRUE;
4871
4872 for (i = 0; i < prog->NumShaders; i++) {
4873 if (!prog->Shaders[i]->CompileStatus) {
4874 fail_link(prog, "linking with uncompiled shader");
4875 prog->LinkStatus = GL_FALSE;
4876 }
4877 }
4878
4879 prog->Varying = _mesa_new_parameter_list();
4880 _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL);
4881 _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL);
4882 _mesa_reference_geomprog(ctx, &prog->GeometryProgram, NULL);
4883
4884 if (prog->LinkStatus) {
4885 link_shaders(ctx, prog);
4886 }
4887
4888 if (prog->LinkStatus) {
4889 if (!ctx->Driver.LinkShader(ctx, prog)) {
4890 prog->LinkStatus = GL_FALSE;
4891 }
4892 }
4893
4894 set_uniform_initializers(ctx, prog);
4895
4896 if (ctx->Shader.Flags & GLSL_DUMP) {
4897 if (!prog->LinkStatus) {
4898 printf("GLSL shader program %d failed to link\n", prog->Name);
4899 }
4900
4901 if (prog->InfoLog && prog->InfoLog[0] != 0) {
4902 printf("GLSL shader program %d info log:\n", prog->Name);
4903 printf("%s\n", prog->InfoLog);
4904 }
4905 }
4906 }
4907
4908 } /* extern "C" */