glsl_to_tgsi: fix more potential shader reference leaks
[mesa.git] / src / mesa / state_tracker / st_glsl_to_tgsi.cpp
1 /*
2 * Copyright (C) 2005-2007 Brian Paul All Rights Reserved.
3 * Copyright (C) 2008 VMware, Inc. All Rights Reserved.
4 * Copyright © 2010 Intel Corporation
5 * Copyright © 2011 Bryan Cain
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 * DEALINGS IN THE SOFTWARE.
25 */
26
27 /**
28 * \file glsl_to_tgsi.cpp
29 *
30 * Translate GLSL IR to TGSI.
31 */
32
33 #include <stdio.h>
34 #include "main/compiler.h"
35 #include "ir.h"
36 #include "ir_visitor.h"
37 #include "ir_print_visitor.h"
38 #include "ir_expression_flattening.h"
39 #include "glsl_types.h"
40 #include "glsl_parser_extras.h"
41 #include "../glsl/program.h"
42 #include "ir_optimization.h"
43 #include "ast.h"
44
45 extern "C" {
46 #include "main/mtypes.h"
47 #include "main/shaderapi.h"
48 #include "main/shaderobj.h"
49 #include "main/uniforms.h"
50 #include "program/hash_table.h"
51 #include "program/prog_instruction.h"
52 #include "program/prog_optimize.h"
53 #include "program/prog_print.h"
54 #include "program/program.h"
55 #include "program/prog_uniform.h"
56 #include "program/prog_parameter.h"
57 #include "program/sampler.h"
58
59 #include "pipe/p_compiler.h"
60 #include "pipe/p_context.h"
61 #include "pipe/p_screen.h"
62 #include "pipe/p_shader_tokens.h"
63 #include "pipe/p_state.h"
64 #include "util/u_math.h"
65 #include "tgsi/tgsi_ureg.h"
66 #include "tgsi/tgsi_info.h"
67 #include "st_context.h"
68 #include "st_program.h"
69 #include "st_glsl_to_tgsi.h"
70 #include "st_mesa_to_tgsi.h"
71 }
72
73 #define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX
74 #define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) | \
75 (1 << PROGRAM_ENV_PARAM) | \
76 (1 << PROGRAM_STATE_VAR) | \
77 (1 << PROGRAM_NAMED_PARAM) | \
78 (1 << PROGRAM_CONSTANT) | \
79 (1 << PROGRAM_UNIFORM))
80
81 #define MAX_TEMPS 4096
82
83 /* will be 4 for GLSL 4.00 */
84 #define MAX_GLSL_TEXTURE_OFFSET 1
85
86 class st_src_reg;
87 class st_dst_reg;
88
89 static int swizzle_for_size(int size);
90
91 /**
92 * This struct is a corresponding struct to TGSI ureg_src.
93 */
94 class st_src_reg {
95 public:
96 st_src_reg(gl_register_file file, int index, const glsl_type *type)
97 {
98 this->file = file;
99 this->index = index;
100 if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
101 this->swizzle = swizzle_for_size(type->vector_elements);
102 else
103 this->swizzle = SWIZZLE_XYZW;
104 this->negate = 0;
105 this->type = type ? type->base_type : GLSL_TYPE_ERROR;
106 this->reladdr = NULL;
107 }
108
109 st_src_reg(gl_register_file file, int index, int type)
110 {
111 this->type = type;
112 this->file = file;
113 this->index = index;
114 this->swizzle = SWIZZLE_XYZW;
115 this->negate = 0;
116 this->reladdr = NULL;
117 }
118
119 st_src_reg()
120 {
121 this->type = GLSL_TYPE_ERROR;
122 this->file = PROGRAM_UNDEFINED;
123 this->index = 0;
124 this->swizzle = 0;
125 this->negate = 0;
126 this->reladdr = NULL;
127 }
128
129 explicit st_src_reg(st_dst_reg reg);
130
131 gl_register_file file; /**< PROGRAM_* from Mesa */
132 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
133 GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
134 int negate; /**< NEGATE_XYZW mask from mesa */
135 int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
136 /** Register index should be offset by the integer in this reg. */
137 st_src_reg *reladdr;
138 };
139
140 class st_dst_reg {
141 public:
142 st_dst_reg(gl_register_file file, int writemask, int type)
143 {
144 this->file = file;
145 this->index = 0;
146 this->writemask = writemask;
147 this->cond_mask = COND_TR;
148 this->reladdr = NULL;
149 this->type = type;
150 }
151
152 st_dst_reg()
153 {
154 this->type = GLSL_TYPE_ERROR;
155 this->file = PROGRAM_UNDEFINED;
156 this->index = 0;
157 this->writemask = 0;
158 this->cond_mask = COND_TR;
159 this->reladdr = NULL;
160 }
161
162 explicit st_dst_reg(st_src_reg reg);
163
164 gl_register_file file; /**< PROGRAM_* from Mesa */
165 int index; /**< temporary index, VERT_ATTRIB_*, FRAG_ATTRIB_*, etc. */
166 int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
167 GLuint cond_mask:4;
168 int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
169 /** Register index should be offset by the integer in this reg. */
170 st_src_reg *reladdr;
171 };
172
173 st_src_reg::st_src_reg(st_dst_reg reg)
174 {
175 this->type = reg.type;
176 this->file = reg.file;
177 this->index = reg.index;
178 this->swizzle = SWIZZLE_XYZW;
179 this->negate = 0;
180 this->reladdr = reg.reladdr;
181 }
182
183 st_dst_reg::st_dst_reg(st_src_reg reg)
184 {
185 this->type = reg.type;
186 this->file = reg.file;
187 this->index = reg.index;
188 this->writemask = WRITEMASK_XYZW;
189 this->cond_mask = COND_TR;
190 this->reladdr = reg.reladdr;
191 }
192
193 class glsl_to_tgsi_instruction : public exec_node {
194 public:
195 /* Callers of this ralloc-based new need not call delete. It's
196 * easier to just ralloc_free 'ctx' (or any of its ancestors). */
197 static void* operator new(size_t size, void *ctx)
198 {
199 void *node;
200
201 node = rzalloc_size(ctx, size);
202 assert(node != NULL);
203
204 return node;
205 }
206
207 unsigned op;
208 st_dst_reg dst;
209 st_src_reg src[3];
210 /** Pointer to the ir source this tree came from for debugging */
211 ir_instruction *ir;
212 GLboolean cond_update;
213 bool saturate;
214 int sampler; /**< sampler index */
215 int tex_target; /**< One of TEXTURE_*_INDEX */
216 GLboolean tex_shadow;
217 struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
218 unsigned tex_offset_num_offset;
219 int dead_mask; /**< Used in dead code elimination */
220
221 class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */
222 };
223
224 class variable_storage : public exec_node {
225 public:
226 variable_storage(ir_variable *var, gl_register_file file, int index)
227 : file(file), index(index), var(var)
228 {
229 /* empty */
230 }
231
232 gl_register_file file;
233 int index;
234 ir_variable *var; /* variable that maps to this, if any */
235 };
236
237 class immediate_storage : public exec_node {
238 public:
239 immediate_storage(gl_constant_value *values, int size, int type)
240 {
241 memcpy(this->values, values, size * sizeof(gl_constant_value));
242 this->size = size;
243 this->type = type;
244 }
245
246 gl_constant_value values[4];
247 int size; /**< Number of components (1-4) */
248 int type; /**< GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */
249 };
250
251 class function_entry : public exec_node {
252 public:
253 ir_function_signature *sig;
254
255 /**
256 * identifier of this function signature used by the program.
257 *
258 * At the point that TGSI instructions for function calls are
259 * generated, we don't know the address of the first instruction of
260 * the function body. So we make the BranchTarget that is called a
261 * small integer and rewrite them during set_branchtargets().
262 */
263 int sig_id;
264
265 /**
266 * Pointer to first instruction of the function body.
267 *
268 * Set during function body emits after main() is processed.
269 */
270 glsl_to_tgsi_instruction *bgn_inst;
271
272 /**
273 * Index of the first instruction of the function body in actual TGSI.
274 *
275 * Set after conversion from glsl_to_tgsi_instruction to TGSI.
276 */
277 int inst;
278
279 /** Storage for the return value. */
280 st_src_reg return_reg;
281 };
282
283 class glsl_to_tgsi_visitor : public ir_visitor {
284 public:
285 glsl_to_tgsi_visitor();
286 ~glsl_to_tgsi_visitor();
287
288 function_entry *current_function;
289
290 struct gl_context *ctx;
291 struct gl_program *prog;
292 struct gl_shader_program *shader_program;
293 struct gl_shader_compiler_options *options;
294
295 int next_temp;
296
297 int num_address_regs;
298 int samplers_used;
299 bool indirect_addr_temps;
300 bool indirect_addr_consts;
301
302 int glsl_version;
303 bool native_integers;
304
305 variable_storage *find_variable_storage(ir_variable *var);
306
307 int add_constant(gl_register_file file, gl_constant_value values[4],
308 int size, int datatype, GLuint *swizzle_out);
309
310 function_entry *get_function_signature(ir_function_signature *sig);
311
312 st_src_reg get_temp(const glsl_type *type);
313 void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr);
314
315 st_src_reg st_src_reg_for_float(float val);
316 st_src_reg st_src_reg_for_int(int val);
317 st_src_reg st_src_reg_for_type(int type, int val);
318
319 /**
320 * \name Visit methods
321 *
322 * As typical for the visitor pattern, there must be one \c visit method for
323 * each concrete subclass of \c ir_instruction. Virtual base classes within
324 * the hierarchy should not have \c visit methods.
325 */
326 /*@{*/
327 virtual void visit(ir_variable *);
328 virtual void visit(ir_loop *);
329 virtual void visit(ir_loop_jump *);
330 virtual void visit(ir_function_signature *);
331 virtual void visit(ir_function *);
332 virtual void visit(ir_expression *);
333 virtual void visit(ir_swizzle *);
334 virtual void visit(ir_dereference_variable *);
335 virtual void visit(ir_dereference_array *);
336 virtual void visit(ir_dereference_record *);
337 virtual void visit(ir_assignment *);
338 virtual void visit(ir_constant *);
339 virtual void visit(ir_call *);
340 virtual void visit(ir_return *);
341 virtual void visit(ir_discard *);
342 virtual void visit(ir_texture *);
343 virtual void visit(ir_if *);
344 /*@}*/
345
346 st_src_reg result;
347
348 /** List of variable_storage */
349 exec_list variables;
350
351 /** List of immediate_storage */
352 exec_list immediates;
353 int num_immediates;
354
355 /** List of function_entry */
356 exec_list function_signatures;
357 int next_signature_id;
358
359 /** List of glsl_to_tgsi_instruction */
360 exec_list instructions;
361
362 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op);
363
364 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
365 st_dst_reg dst, st_src_reg src0);
366
367 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
368 st_dst_reg dst, st_src_reg src0, st_src_reg src1);
369
370 glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
371 st_dst_reg dst,
372 st_src_reg src0, st_src_reg src1, st_src_reg src2);
373
374 unsigned get_opcode(ir_instruction *ir, unsigned op,
375 st_dst_reg dst,
376 st_src_reg src0, st_src_reg src1);
377
378 /**
379 * Emit the correct dot-product instruction for the type of arguments
380 */
381 glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir,
382 st_dst_reg dst,
383 st_src_reg src0,
384 st_src_reg src1,
385 unsigned elements);
386
387 void emit_scalar(ir_instruction *ir, unsigned op,
388 st_dst_reg dst, st_src_reg src0);
389
390 void emit_scalar(ir_instruction *ir, unsigned op,
391 st_dst_reg dst, st_src_reg src0, st_src_reg src1);
392
393 void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0);
394
395 void emit_scs(ir_instruction *ir, unsigned op,
396 st_dst_reg dst, const st_src_reg &src);
397
398 bool try_emit_mad(ir_expression *ir,
399 int mul_operand);
400 bool try_emit_mad_for_and_not(ir_expression *ir,
401 int mul_operand);
402 bool try_emit_sat(ir_expression *ir);
403
404 void emit_swz(ir_expression *ir);
405
406 bool process_move_condition(ir_rvalue *ir);
407
408 void remove_output_reads(gl_register_file type);
409 void simplify_cmp(void);
410
411 void rename_temp_register(int index, int new_index);
412 int get_first_temp_read(int index);
413 int get_first_temp_write(int index);
414 int get_last_temp_read(int index);
415 int get_last_temp_write(int index);
416
417 void copy_propagate(void);
418 void eliminate_dead_code(void);
419 int eliminate_dead_code_advanced(void);
420 void merge_registers(void);
421 void renumber_registers(void);
422
423 void *mem_ctx;
424 };
425
426 static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR);
427
428 static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR);
429
430 static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT);
431
432 static void
433 fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3);
434
435 static void
436 fail_link(struct gl_shader_program *prog, const char *fmt, ...)
437 {
438 va_list args;
439 va_start(args, fmt);
440 ralloc_vasprintf_append(&prog->InfoLog, fmt, args);
441 va_end(args);
442
443 prog->LinkStatus = GL_FALSE;
444 }
445
446 static int
447 swizzle_for_size(int size)
448 {
449 int size_swizzles[4] = {
450 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
451 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
452 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
453 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
454 };
455
456 assert((size >= 1) && (size <= 4));
457 return size_swizzles[size - 1];
458 }
459
460 static bool
461 is_tex_instruction(unsigned opcode)
462 {
463 const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
464 return info->is_tex;
465 }
466
467 static unsigned
468 num_inst_dst_regs(unsigned opcode)
469 {
470 const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
471 return info->num_dst;
472 }
473
474 static unsigned
475 num_inst_src_regs(unsigned opcode)
476 {
477 const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
478 return info->is_tex ? info->num_src - 1 : info->num_src;
479 }
480
481 glsl_to_tgsi_instruction *
482 glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
483 st_dst_reg dst,
484 st_src_reg src0, st_src_reg src1, st_src_reg src2)
485 {
486 glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction();
487 int num_reladdr = 0, i;
488
489 op = get_opcode(ir, op, dst, src0, src1);
490
491 /* If we have to do relative addressing, we want to load the ARL
492 * reg directly for one of the regs, and preload the other reladdr
493 * sources into temps.
494 */
495 num_reladdr += dst.reladdr != NULL;
496 num_reladdr += src0.reladdr != NULL;
497 num_reladdr += src1.reladdr != NULL;
498 num_reladdr += src2.reladdr != NULL;
499
500 reladdr_to_temp(ir, &src2, &num_reladdr);
501 reladdr_to_temp(ir, &src1, &num_reladdr);
502 reladdr_to_temp(ir, &src0, &num_reladdr);
503
504 if (dst.reladdr) {
505 emit_arl(ir, address_reg, *dst.reladdr);
506 num_reladdr--;
507 }
508 assert(num_reladdr == 0);
509
510 inst->op = op;
511 inst->dst = dst;
512 inst->src[0] = src0;
513 inst->src[1] = src1;
514 inst->src[2] = src2;
515 inst->ir = ir;
516 inst->dead_mask = 0;
517
518 inst->function = NULL;
519
520 if (op == TGSI_OPCODE_ARL)
521 this->num_address_regs = 1;
522
523 /* Update indirect addressing status used by TGSI */
524 if (dst.reladdr) {
525 switch(dst.file) {
526 case PROGRAM_TEMPORARY:
527 this->indirect_addr_temps = true;
528 break;
529 case PROGRAM_LOCAL_PARAM:
530 case PROGRAM_ENV_PARAM:
531 case PROGRAM_STATE_VAR:
532 case PROGRAM_NAMED_PARAM:
533 case PROGRAM_CONSTANT:
534 case PROGRAM_UNIFORM:
535 this->indirect_addr_consts = true;
536 break;
537 case PROGRAM_IMMEDIATE:
538 assert(!"immediates should not have indirect addressing");
539 break;
540 default:
541 break;
542 }
543 }
544 else {
545 for (i=0; i<3; i++) {
546 if(inst->src[i].reladdr) {
547 switch(inst->src[i].file) {
548 case PROGRAM_TEMPORARY:
549 this->indirect_addr_temps = true;
550 break;
551 case PROGRAM_LOCAL_PARAM:
552 case PROGRAM_ENV_PARAM:
553 case PROGRAM_STATE_VAR:
554 case PROGRAM_NAMED_PARAM:
555 case PROGRAM_CONSTANT:
556 case PROGRAM_UNIFORM:
557 this->indirect_addr_consts = true;
558 break;
559 case PROGRAM_IMMEDIATE:
560 assert(!"immediates should not have indirect addressing");
561 break;
562 default:
563 break;
564 }
565 }
566 }
567 }
568
569 this->instructions.push_tail(inst);
570
571 return inst;
572 }
573
574
575 glsl_to_tgsi_instruction *
576 glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
577 st_dst_reg dst, st_src_reg src0, st_src_reg src1)
578 {
579 return emit(ir, op, dst, src0, src1, undef_src);
580 }
581
582 glsl_to_tgsi_instruction *
583 glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
584 st_dst_reg dst, st_src_reg src0)
585 {
586 assert(dst.writemask != 0);
587 return emit(ir, op, dst, src0, undef_src, undef_src);
588 }
589
590 glsl_to_tgsi_instruction *
591 glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op)
592 {
593 return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
594 }
595
596 /**
597 * Determines whether to use an integer, unsigned integer, or float opcode
598 * based on the operands and input opcode, then emits the result.
599 *
600 * TODO: type checking for remaining TGSI opcodes
601 */
602 unsigned
603 glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
604 st_dst_reg dst,
605 st_src_reg src0, st_src_reg src1)
606 {
607 int type = GLSL_TYPE_FLOAT;
608
609 if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
610 type = GLSL_TYPE_FLOAT;
611 else if (native_integers)
612 type = src0.type;
613
614 #define case4(c, f, i, u) \
615 case TGSI_OPCODE_##c: \
616 if (type == GLSL_TYPE_INT) op = TGSI_OPCODE_##i; \
617 else if (type == GLSL_TYPE_UINT) op = TGSI_OPCODE_##u; \
618 else op = TGSI_OPCODE_##f; \
619 break;
620 #define case3(f, i, u) case4(f, f, i, u)
621 #define case2fi(f, i) case4(f, f, i, i)
622 #define case2iu(i, u) case4(i, LAST, i, u)
623
624 switch(op) {
625 case2fi(ADD, UADD);
626 case2fi(MUL, UMUL);
627 case2fi(MAD, UMAD);
628 case3(DIV, IDIV, UDIV);
629 case3(MAX, IMAX, UMAX);
630 case3(MIN, IMIN, UMIN);
631 case2iu(MOD, UMOD);
632
633 case2fi(SEQ, USEQ);
634 case2fi(SNE, USNE);
635 case3(SGE, ISGE, USGE);
636 case3(SLT, ISLT, USLT);
637
638 case2iu(SHL, SHL);
639 case2iu(ISHR, USHR);
640 case2iu(NOT, NOT);
641 case2iu(AND, AND);
642 case2iu(OR, OR);
643 case2iu(XOR, XOR);
644
645 default: break;
646 }
647
648 assert(op != TGSI_OPCODE_LAST);
649 return op;
650 }
651
652 glsl_to_tgsi_instruction *
653 glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
654 st_dst_reg dst, st_src_reg src0, st_src_reg src1,
655 unsigned elements)
656 {
657 static const unsigned dot_opcodes[] = {
658 TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4
659 };
660
661 return emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
662 }
663
664 /**
665 * Emits TGSI scalar opcodes to produce unique answers across channels.
666 *
667 * Some TGSI opcodes are scalar-only, like ARB_fp/vp. The src X
668 * channel determines the result across all channels. So to do a vec4
669 * of this operation, we want to emit a scalar per source channel used
670 * to produce dest channels.
671 */
672 void
673 glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
674 st_dst_reg dst,
675 st_src_reg orig_src0, st_src_reg orig_src1)
676 {
677 int i, j;
678 int done_mask = ~dst.writemask;
679
680 /* TGSI RCP is a scalar operation splatting results to all channels,
681 * like ARB_fp/vp. So emit as many RCPs as necessary to cover our
682 * dst channels.
683 */
684 for (i = 0; i < 4; i++) {
685 GLuint this_mask = (1 << i);
686 glsl_to_tgsi_instruction *inst;
687 st_src_reg src0 = orig_src0;
688 st_src_reg src1 = orig_src1;
689
690 if (done_mask & this_mask)
691 continue;
692
693 GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
694 GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
695 for (j = i + 1; j < 4; j++) {
696 /* If there is another enabled component in the destination that is
697 * derived from the same inputs, generate its value on this pass as
698 * well.
699 */
700 if (!(done_mask & (1 << j)) &&
701 GET_SWZ(src0.swizzle, j) == src0_swiz &&
702 GET_SWZ(src1.swizzle, j) == src1_swiz) {
703 this_mask |= (1 << j);
704 }
705 }
706 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
707 src0_swiz, src0_swiz);
708 src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
709 src1_swiz, src1_swiz);
710
711 inst = emit(ir, op, dst, src0, src1);
712 inst->dst.writemask = this_mask;
713 done_mask |= this_mask;
714 }
715 }
716
717 void
718 glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
719 st_dst_reg dst, st_src_reg src0)
720 {
721 st_src_reg undef = undef_src;
722
723 undef.swizzle = SWIZZLE_XXXX;
724
725 emit_scalar(ir, op, dst, src0, undef);
726 }
727
728 void
729 glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
730 st_dst_reg dst, st_src_reg src0)
731 {
732 st_src_reg tmp = get_temp(glsl_type::float_type);
733
734 if (src0.type == GLSL_TYPE_INT)
735 emit(NULL, TGSI_OPCODE_I2F, st_dst_reg(tmp), src0);
736 else if (src0.type == GLSL_TYPE_UINT)
737 emit(NULL, TGSI_OPCODE_U2F, st_dst_reg(tmp), src0);
738 else
739 tmp = src0;
740
741 emit(NULL, TGSI_OPCODE_ARL, dst, tmp);
742 }
743
744 /**
745 * Emit an TGSI_OPCODE_SCS instruction
746 *
747 * The \c SCS opcode functions a bit differently than the other TGSI opcodes.
748 * Instead of splatting its result across all four components of the
749 * destination, it writes one value to the \c x component and another value to
750 * the \c y component.
751 *
752 * \param ir IR instruction being processed
753 * \param op Either \c TGSI_OPCODE_SIN or \c TGSI_OPCODE_COS depending
754 * on which value is desired.
755 * \param dst Destination register
756 * \param src Source register
757 */
758 void
759 glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op,
760 st_dst_reg dst,
761 const st_src_reg &src)
762 {
763 /* Vertex programs cannot use the SCS opcode.
764 */
765 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) {
766 emit_scalar(ir, op, dst, src);
767 return;
768 }
769
770 const unsigned component = (op == TGSI_OPCODE_SIN) ? 0 : 1;
771 const unsigned scs_mask = (1U << component);
772 int done_mask = ~dst.writemask;
773 st_src_reg tmp;
774
775 assert(op == TGSI_OPCODE_SIN || op == TGSI_OPCODE_COS);
776
777 /* If there are compnents in the destination that differ from the component
778 * that will be written by the SCS instrution, we'll need a temporary.
779 */
780 if (scs_mask != unsigned(dst.writemask)) {
781 tmp = get_temp(glsl_type::vec4_type);
782 }
783
784 for (unsigned i = 0; i < 4; i++) {
785 unsigned this_mask = (1U << i);
786 st_src_reg src0 = src;
787
788 if ((done_mask & this_mask) != 0)
789 continue;
790
791 /* The source swizzle specified which component of the source generates
792 * sine / cosine for the current component in the destination. The SCS
793 * instruction requires that this value be swizzle to the X component.
794 * Replace the current swizzle with a swizzle that puts the source in
795 * the X component.
796 */
797 unsigned src0_swiz = GET_SWZ(src.swizzle, i);
798
799 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
800 src0_swiz, src0_swiz);
801 for (unsigned j = i + 1; j < 4; j++) {
802 /* If there is another enabled component in the destination that is
803 * derived from the same inputs, generate its value on this pass as
804 * well.
805 */
806 if (!(done_mask & (1 << j)) &&
807 GET_SWZ(src0.swizzle, j) == src0_swiz) {
808 this_mask |= (1 << j);
809 }
810 }
811
812 if (this_mask != scs_mask) {
813 glsl_to_tgsi_instruction *inst;
814 st_dst_reg tmp_dst = st_dst_reg(tmp);
815
816 /* Emit the SCS instruction.
817 */
818 inst = emit(ir, TGSI_OPCODE_SCS, tmp_dst, src0);
819 inst->dst.writemask = scs_mask;
820
821 /* Move the result of the SCS instruction to the desired location in
822 * the destination.
823 */
824 tmp.swizzle = MAKE_SWIZZLE4(component, component,
825 component, component);
826 inst = emit(ir, TGSI_OPCODE_SCS, dst, tmp);
827 inst->dst.writemask = this_mask;
828 } else {
829 /* Emit the SCS instruction to write directly to the destination.
830 */
831 glsl_to_tgsi_instruction *inst = emit(ir, TGSI_OPCODE_SCS, dst, src0);
832 inst->dst.writemask = scs_mask;
833 }
834
835 done_mask |= this_mask;
836 }
837 }
838
839 int
840 glsl_to_tgsi_visitor::add_constant(gl_register_file file,
841 gl_constant_value values[4], int size, int datatype,
842 GLuint *swizzle_out)
843 {
844 if (file == PROGRAM_CONSTANT) {
845 return _mesa_add_typed_unnamed_constant(this->prog->Parameters, values,
846 size, datatype, swizzle_out);
847 } else {
848 int index = 0;
849 immediate_storage *entry;
850 assert(file == PROGRAM_IMMEDIATE);
851
852 /* Search immediate storage to see if we already have an identical
853 * immediate that we can use instead of adding a duplicate entry.
854 */
855 foreach_iter(exec_list_iterator, iter, this->immediates) {
856 entry = (immediate_storage *)iter.get();
857
858 if (entry->size == size &&
859 entry->type == datatype &&
860 !memcmp(entry->values, values, size * sizeof(gl_constant_value))) {
861 return index;
862 }
863 index++;
864 }
865
866 /* Add this immediate to the list. */
867 entry = new(mem_ctx) immediate_storage(values, size, datatype);
868 this->immediates.push_tail(entry);
869 this->num_immediates++;
870 return index;
871 }
872 }
873
874 st_src_reg
875 glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
876 {
877 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT);
878 union gl_constant_value uval;
879
880 uval.f = val;
881 src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle);
882
883 return src;
884 }
885
886 st_src_reg
887 glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
888 {
889 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT);
890 union gl_constant_value uval;
891
892 assert(native_integers);
893
894 uval.i = val;
895 src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle);
896
897 return src;
898 }
899
900 st_src_reg
901 glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val)
902 {
903 if (native_integers)
904 return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) :
905 st_src_reg_for_int(val);
906 else
907 return st_src_reg_for_float(val);
908 }
909
910 static int
911 type_size(const struct glsl_type *type)
912 {
913 unsigned int i;
914 int size;
915
916 switch (type->base_type) {
917 case GLSL_TYPE_UINT:
918 case GLSL_TYPE_INT:
919 case GLSL_TYPE_FLOAT:
920 case GLSL_TYPE_BOOL:
921 if (type->is_matrix()) {
922 return type->matrix_columns;
923 } else {
924 /* Regardless of size of vector, it gets a vec4. This is bad
925 * packing for things like floats, but otherwise arrays become a
926 * mess. Hopefully a later pass over the code can pack scalars
927 * down if appropriate.
928 */
929 return 1;
930 }
931 case GLSL_TYPE_ARRAY:
932 assert(type->length > 0);
933 return type_size(type->fields.array) * type->length;
934 case GLSL_TYPE_STRUCT:
935 size = 0;
936 for (i = 0; i < type->length; i++) {
937 size += type_size(type->fields.structure[i].type);
938 }
939 return size;
940 case GLSL_TYPE_SAMPLER:
941 /* Samplers take up one slot in UNIFORMS[], but they're baked in
942 * at link time.
943 */
944 return 1;
945 default:
946 assert(0);
947 return 0;
948 }
949 }
950
951 /**
952 * In the initial pass of codegen, we assign temporary numbers to
953 * intermediate results. (not SSA -- variable assignments will reuse
954 * storage).
955 */
956 st_src_reg
957 glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
958 {
959 st_src_reg src;
960
961 src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
962 src.file = PROGRAM_TEMPORARY;
963 src.index = next_temp;
964 src.reladdr = NULL;
965 next_temp += type_size(type);
966
967 if (type->is_array() || type->is_record()) {
968 src.swizzle = SWIZZLE_NOOP;
969 } else {
970 src.swizzle = swizzle_for_size(type->vector_elements);
971 }
972 src.negate = 0;
973
974 return src;
975 }
976
977 variable_storage *
978 glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var)
979 {
980
981 variable_storage *entry;
982
983 foreach_iter(exec_list_iterator, iter, this->variables) {
984 entry = (variable_storage *)iter.get();
985
986 if (entry->var == var)
987 return entry;
988 }
989
990 return NULL;
991 }
992
993 void
994 glsl_to_tgsi_visitor::visit(ir_variable *ir)
995 {
996 if (strcmp(ir->name, "gl_FragCoord") == 0) {
997 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
998
999 fp->OriginUpperLeft = ir->origin_upper_left;
1000 fp->PixelCenterInteger = ir->pixel_center_integer;
1001
1002 } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
1003 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
1004 switch (ir->depth_layout) {
1005 case ir_depth_layout_none:
1006 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_NONE;
1007 break;
1008 case ir_depth_layout_any:
1009 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_ANY;
1010 break;
1011 case ir_depth_layout_greater:
1012 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_GREATER;
1013 break;
1014 case ir_depth_layout_less:
1015 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_LESS;
1016 break;
1017 case ir_depth_layout_unchanged:
1018 fp->FragDepthLayout = FRAG_DEPTH_LAYOUT_UNCHANGED;
1019 break;
1020 default:
1021 assert(0);
1022 break;
1023 }
1024 }
1025
1026 if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
1027 unsigned int i;
1028 const ir_state_slot *const slots = ir->state_slots;
1029 assert(ir->state_slots != NULL);
1030
1031 /* Check if this statevar's setup in the STATE file exactly
1032 * matches how we'll want to reference it as a
1033 * struct/array/whatever. If not, then we need to move it into
1034 * temporary storage and hope that it'll get copy-propagated
1035 * out.
1036 */
1037 for (i = 0; i < ir->num_state_slots; i++) {
1038 if (slots[i].swizzle != SWIZZLE_XYZW) {
1039 break;
1040 }
1041 }
1042
1043 variable_storage *storage;
1044 st_dst_reg dst;
1045 if (i == ir->num_state_slots) {
1046 /* We'll set the index later. */
1047 storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
1048 this->variables.push_tail(storage);
1049
1050 dst = undef_dst;
1051 } else {
1052 /* The variable_storage constructor allocates slots based on the size
1053 * of the type. However, this had better match the number of state
1054 * elements that we're going to copy into the new temporary.
1055 */
1056 assert((int) ir->num_state_slots == type_size(ir->type));
1057
1058 storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY,
1059 this->next_temp);
1060 this->variables.push_tail(storage);
1061 this->next_temp += type_size(ir->type);
1062
1063 dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index,
1064 native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT));
1065 }
1066
1067
1068 for (unsigned int i = 0; i < ir->num_state_slots; i++) {
1069 int index = _mesa_add_state_reference(this->prog->Parameters,
1070 (gl_state_index *)slots[i].tokens);
1071
1072 if (storage->file == PROGRAM_STATE_VAR) {
1073 if (storage->index == -1) {
1074 storage->index = index;
1075 } else {
1076 assert(index == storage->index + (int)i);
1077 }
1078 } else {
1079 st_src_reg src(PROGRAM_STATE_VAR, index,
1080 native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT);
1081 src.swizzle = slots[i].swizzle;
1082 emit(ir, TGSI_OPCODE_MOV, dst, src);
1083 /* even a float takes up a whole vec4 reg in a struct/array. */
1084 dst.index++;
1085 }
1086 }
1087
1088 if (storage->file == PROGRAM_TEMPORARY &&
1089 dst.index != storage->index + (int) ir->num_state_slots) {
1090 fail_link(this->shader_program,
1091 "failed to load builtin uniform `%s' (%d/%d regs loaded)\n",
1092 ir->name, dst.index - storage->index,
1093 type_size(ir->type));
1094 }
1095 }
1096 }
1097
1098 void
1099 glsl_to_tgsi_visitor::visit(ir_loop *ir)
1100 {
1101 ir_dereference_variable *counter = NULL;
1102
1103 if (ir->counter != NULL)
1104 counter = new(ir) ir_dereference_variable(ir->counter);
1105
1106 if (ir->from != NULL) {
1107 assert(ir->counter != NULL);
1108
1109 ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL);
1110
1111 a->accept(this);
1112 delete a;
1113 }
1114
1115 emit(NULL, TGSI_OPCODE_BGNLOOP);
1116
1117 if (ir->to) {
1118 ir_expression *e =
1119 new(ir) ir_expression(ir->cmp, glsl_type::bool_type,
1120 counter, ir->to);
1121 ir_if *if_stmt = new(ir) ir_if(e);
1122
1123 ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break);
1124
1125 if_stmt->then_instructions.push_tail(brk);
1126
1127 if_stmt->accept(this);
1128
1129 delete if_stmt;
1130 delete e;
1131 delete brk;
1132 }
1133
1134 visit_exec_list(&ir->body_instructions, this);
1135
1136 if (ir->increment) {
1137 ir_expression *e =
1138 new(ir) ir_expression(ir_binop_add, counter->type,
1139 counter, ir->increment);
1140
1141 ir_assignment *a = new(ir) ir_assignment(counter, e, NULL);
1142
1143 a->accept(this);
1144 delete a;
1145 delete e;
1146 }
1147
1148 emit(NULL, TGSI_OPCODE_ENDLOOP);
1149 }
1150
1151 void
1152 glsl_to_tgsi_visitor::visit(ir_loop_jump *ir)
1153 {
1154 switch (ir->mode) {
1155 case ir_loop_jump::jump_break:
1156 emit(NULL, TGSI_OPCODE_BRK);
1157 break;
1158 case ir_loop_jump::jump_continue:
1159 emit(NULL, TGSI_OPCODE_CONT);
1160 break;
1161 }
1162 }
1163
1164
1165 void
1166 glsl_to_tgsi_visitor::visit(ir_function_signature *ir)
1167 {
1168 assert(0);
1169 (void)ir;
1170 }
1171
1172 void
1173 glsl_to_tgsi_visitor::visit(ir_function *ir)
1174 {
1175 /* Ignore function bodies other than main() -- we shouldn't see calls to
1176 * them since they should all be inlined before we get to glsl_to_tgsi.
1177 */
1178 if (strcmp(ir->name, "main") == 0) {
1179 const ir_function_signature *sig;
1180 exec_list empty;
1181
1182 sig = ir->matching_signature(&empty);
1183
1184 assert(sig);
1185
1186 foreach_iter(exec_list_iterator, iter, sig->body) {
1187 ir_instruction *ir = (ir_instruction *)iter.get();
1188
1189 ir->accept(this);
1190 }
1191 }
1192 }
1193
1194 bool
1195 glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
1196 {
1197 int nonmul_operand = 1 - mul_operand;
1198 st_src_reg a, b, c;
1199 st_dst_reg result_dst;
1200
1201 ir_expression *expr = ir->operands[mul_operand]->as_expression();
1202 if (!expr || expr->operation != ir_binop_mul)
1203 return false;
1204
1205 expr->operands[0]->accept(this);
1206 a = this->result;
1207 expr->operands[1]->accept(this);
1208 b = this->result;
1209 ir->operands[nonmul_operand]->accept(this);
1210 c = this->result;
1211
1212 this->result = get_temp(ir->type);
1213 result_dst = st_dst_reg(this->result);
1214 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1215 emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c);
1216
1217 return true;
1218 }
1219
1220 /**
1221 * Emit MAD(a, -b, a) instead of AND(a, NOT(b))
1222 *
1223 * The logic values are 1.0 for true and 0.0 for false. Logical-and is
1224 * implemented using multiplication, and logical-or is implemented using
1225 * addition. Logical-not can be implemented as (true - x), or (1.0 - x).
1226 * As result, the logical expression (a & !b) can be rewritten as:
1227 *
1228 * - a * !b
1229 * - a * (1 - b)
1230 * - (a * 1) - (a * b)
1231 * - a + -(a * b)
1232 * - a + (a * -b)
1233 *
1234 * This final expression can be implemented as a single MAD(a, -b, a)
1235 * instruction.
1236 */
1237 bool
1238 glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
1239 {
1240 const int other_operand = 1 - try_operand;
1241 st_src_reg a, b;
1242
1243 ir_expression *expr = ir->operands[try_operand]->as_expression();
1244 if (!expr || expr->operation != ir_unop_logic_not)
1245 return false;
1246
1247 ir->operands[other_operand]->accept(this);
1248 a = this->result;
1249 expr->operands[0]->accept(this);
1250 b = this->result;
1251
1252 b.negate = ~b.negate;
1253
1254 this->result = get_temp(ir->type);
1255 emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a);
1256
1257 return true;
1258 }
1259
1260 bool
1261 glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
1262 {
1263 /* Saturates were only introduced to vertex programs in
1264 * NV_vertex_program3, so don't give them to drivers in the VP.
1265 */
1266 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB)
1267 return false;
1268
1269 ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
1270 if (!sat_src)
1271 return false;
1272
1273 sat_src->accept(this);
1274 st_src_reg src = this->result;
1275
1276 /* If we generated an expression instruction into a temporary in
1277 * processing the saturate's operand, apply the saturate to that
1278 * instruction. Otherwise, generate a MOV to do the saturate.
1279 *
1280 * Note that we have to be careful to only do this optimization if
1281 * the instruction in question was what generated src->result. For
1282 * example, ir_dereference_array might generate a MUL instruction
1283 * to create the reladdr, and return us a src reg using that
1284 * reladdr. That MUL result is not the value we're trying to
1285 * saturate.
1286 */
1287 ir_expression *sat_src_expr = sat_src->as_expression();
1288 if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul ||
1289 sat_src_expr->operation == ir_binop_add ||
1290 sat_src_expr->operation == ir_binop_dot)) {
1291 glsl_to_tgsi_instruction *new_inst;
1292 new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
1293 new_inst->saturate = true;
1294 } else {
1295 this->result = get_temp(ir->type);
1296 st_dst_reg result_dst = st_dst_reg(this->result);
1297 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1298 glsl_to_tgsi_instruction *inst;
1299 inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src);
1300 inst->saturate = true;
1301 }
1302
1303 return true;
1304 }
1305
1306 void
1307 glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
1308 st_src_reg *reg, int *num_reladdr)
1309 {
1310 if (!reg->reladdr)
1311 return;
1312
1313 emit_arl(ir, address_reg, *reg->reladdr);
1314
1315 if (*num_reladdr != 1) {
1316 st_src_reg temp = get_temp(glsl_type::vec4_type);
1317
1318 emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg);
1319 *reg = temp;
1320 }
1321
1322 (*num_reladdr)--;
1323 }
1324
1325 void
1326 glsl_to_tgsi_visitor::visit(ir_expression *ir)
1327 {
1328 unsigned int operand;
1329 st_src_reg op[Elements(ir->operands)];
1330 st_src_reg result_src;
1331 st_dst_reg result_dst;
1332
1333 /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c)
1334 */
1335 if (ir->operation == ir_binop_add) {
1336 if (try_emit_mad(ir, 1))
1337 return;
1338 if (try_emit_mad(ir, 0))
1339 return;
1340 }
1341
1342 /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b))
1343 */
1344 if (ir->operation == ir_binop_logic_and) {
1345 if (try_emit_mad_for_and_not(ir, 1))
1346 return;
1347 if (try_emit_mad_for_and_not(ir, 0))
1348 return;
1349 }
1350
1351 if (try_emit_sat(ir))
1352 return;
1353
1354 if (ir->operation == ir_quadop_vector)
1355 assert(!"ir_quadop_vector should have been lowered");
1356
1357 for (operand = 0; operand < ir->get_num_operands(); operand++) {
1358 this->result.file = PROGRAM_UNDEFINED;
1359 ir->operands[operand]->accept(this);
1360 if (this->result.file == PROGRAM_UNDEFINED) {
1361 ir_print_visitor v;
1362 printf("Failed to get tree for expression operand:\n");
1363 ir->operands[operand]->accept(&v);
1364 exit(1);
1365 }
1366 op[operand] = this->result;
1367
1368 /* Matrix expression operands should have been broken down to vector
1369 * operations already.
1370 */
1371 assert(!ir->operands[operand]->type->is_matrix());
1372 }
1373
1374 int vector_elements = ir->operands[0]->type->vector_elements;
1375 if (ir->operands[1]) {
1376 vector_elements = MAX2(vector_elements,
1377 ir->operands[1]->type->vector_elements);
1378 }
1379
1380 this->result.file = PROGRAM_UNDEFINED;
1381
1382 /* Storage for our result. Ideally for an assignment we'd be using
1383 * the actual storage for the result here, instead.
1384 */
1385 result_src = get_temp(ir->type);
1386 /* convenience for the emit functions below. */
1387 result_dst = st_dst_reg(result_src);
1388 /* Limit writes to the channels that will be used by result_src later.
1389 * This does limit this temp's use as a temporary for multi-instruction
1390 * sequences.
1391 */
1392 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1393
1394 switch (ir->operation) {
1395 case ir_unop_logic_not:
1396 if (result_dst.type != GLSL_TYPE_FLOAT)
1397 emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_type(result_dst.type, 0));
1398 else {
1399 /* Previously 'SEQ dst, src, 0.0' was used for this. However, many
1400 * older GPUs implement SEQ using multiple instructions (i915 uses two
1401 * SGE instructions and a MUL instruction). Since our logic values are
1402 * 0.0 and 1.0, 1-x also implements !x.
1403 */
1404 op[0].negate = ~op[0].negate;
1405 emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0));
1406 }
1407 break;
1408 case ir_unop_neg:
1409 assert(result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_INT);
1410 if (result_dst.type == GLSL_TYPE_INT)
1411 emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
1412 else {
1413 op[0].negate = ~op[0].negate;
1414 result_src = op[0];
1415 }
1416 break;
1417 case ir_unop_abs:
1418 assert(result_dst.type == GLSL_TYPE_FLOAT);
1419 emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]);
1420 break;
1421 case ir_unop_sign:
1422 emit(ir, TGSI_OPCODE_SSG, result_dst, op[0]);
1423 break;
1424 case ir_unop_rcp:
1425 emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]);
1426 break;
1427
1428 case ir_unop_exp2:
1429 emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]);
1430 break;
1431 case ir_unop_exp:
1432 case ir_unop_log:
1433 assert(!"not reached: should be handled by ir_explog_to_explog2");
1434 break;
1435 case ir_unop_log2:
1436 emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]);
1437 break;
1438 case ir_unop_sin:
1439 emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]);
1440 break;
1441 case ir_unop_cos:
1442 emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]);
1443 break;
1444 case ir_unop_sin_reduced:
1445 emit_scs(ir, TGSI_OPCODE_SIN, result_dst, op[0]);
1446 break;
1447 case ir_unop_cos_reduced:
1448 emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]);
1449 break;
1450
1451 case ir_unop_dFdx:
1452 emit(ir, TGSI_OPCODE_DDX, result_dst, op[0]);
1453 break;
1454 case ir_unop_dFdy:
1455 op[0].negate = ~op[0].negate;
1456 emit(ir, TGSI_OPCODE_DDY, result_dst, op[0]);
1457 break;
1458
1459 case ir_unop_noise: {
1460 /* At some point, a motivated person could add a better
1461 * implementation of noise. Currently not even the nvidia
1462 * binary drivers do anything more than this. In any case, the
1463 * place to do this is in the GL state tracker, not the poor
1464 * driver.
1465 */
1466 emit(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5));
1467 break;
1468 }
1469
1470 case ir_binop_add:
1471 emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
1472 break;
1473 case ir_binop_sub:
1474 emit(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]);
1475 break;
1476
1477 case ir_binop_mul:
1478 emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
1479 break;
1480 case ir_binop_div:
1481 if (result_dst.type == GLSL_TYPE_FLOAT)
1482 assert(!"not reached: should be handled by ir_div_to_mul_rcp");
1483 else
1484 emit(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]);
1485 break;
1486 case ir_binop_mod:
1487 if (result_dst.type == GLSL_TYPE_FLOAT)
1488 assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
1489 else
1490 emit(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]);
1491 break;
1492
1493 case ir_binop_less:
1494 emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]);
1495 break;
1496 case ir_binop_greater:
1497 emit(ir, TGSI_OPCODE_SGT, result_dst, op[0], op[1]);
1498 break;
1499 case ir_binop_lequal:
1500 emit(ir, TGSI_OPCODE_SLE, result_dst, op[0], op[1]);
1501 break;
1502 case ir_binop_gequal:
1503 emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]);
1504 break;
1505 case ir_binop_equal:
1506 emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
1507 break;
1508 case ir_binop_nequal:
1509 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1510 break;
1511 case ir_binop_all_equal:
1512 /* "==" operator producing a scalar boolean. */
1513 if (ir->operands[0]->type->is_vector() ||
1514 ir->operands[1]->type->is_vector()) {
1515 st_src_reg temp = get_temp(native_integers ?
1516 glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
1517 glsl_type::vec4_type);
1518 assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
1519 emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
1520
1521 /* After the dot-product, the value will be an integer on the
1522 * range [0,4]. Zero becomes 1.0, and positive values become zero.
1523 */
1524 emit_dp(ir, result_dst, temp, temp, vector_elements);
1525
1526 if (result_dst.type == GLSL_TYPE_FLOAT) {
1527 /* Negating the result of the dot-product gives values on the range
1528 * [-4, 0]. Zero becomes 1.0, and negative values become zero.
1529 * This is achieved using SGE.
1530 */
1531 st_src_reg sge_src = result_src;
1532 sge_src.negate = ~sge_src.negate;
1533 emit(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0));
1534 } else {
1535 /* The TGSI negate flag doesn't work for integers, so use SEQ 0
1536 * instead.
1537 */
1538 emit(ir, TGSI_OPCODE_SEQ, result_dst, result_src, st_src_reg_for_int(0));
1539 }
1540 } else {
1541 emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
1542 }
1543 break;
1544 case ir_binop_any_nequal:
1545 /* "!=" operator producing a scalar boolean. */
1546 if (ir->operands[0]->type->is_vector() ||
1547 ir->operands[1]->type->is_vector()) {
1548 st_src_reg temp = get_temp(native_integers ?
1549 glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
1550 glsl_type::vec4_type);
1551 assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT);
1552 emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
1553
1554 /* After the dot-product, the value will be an integer on the
1555 * range [0,4]. Zero stays zero, and positive values become 1.0.
1556 */
1557 glsl_to_tgsi_instruction *const dp =
1558 emit_dp(ir, result_dst, temp, temp, vector_elements);
1559 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
1560 result_dst.type == GLSL_TYPE_FLOAT) {
1561 /* The clamping to [0,1] can be done for free in the fragment
1562 * shader with a saturate.
1563 */
1564 dp->saturate = true;
1565 } else if (result_dst.type == GLSL_TYPE_FLOAT) {
1566 /* Negating the result of the dot-product gives values on the range
1567 * [-4, 0]. Zero stays zero, and negative values become 1.0. This
1568 * achieved using SLT.
1569 */
1570 st_src_reg slt_src = result_src;
1571 slt_src.negate = ~slt_src.negate;
1572 emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
1573 } else {
1574 emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
1575 }
1576 } else {
1577 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1578 }
1579 break;
1580
1581 case ir_unop_any: {
1582 assert(ir->operands[0]->type->is_vector());
1583
1584 /* After the dot-product, the value will be an integer on the
1585 * range [0,4]. Zero stays zero, and positive values become 1.0.
1586 */
1587 glsl_to_tgsi_instruction *const dp =
1588 emit_dp(ir, result_dst, op[0], op[0],
1589 ir->operands[0]->type->vector_elements);
1590 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
1591 result_dst.type == GLSL_TYPE_FLOAT) {
1592 /* The clamping to [0,1] can be done for free in the fragment
1593 * shader with a saturate.
1594 */
1595 dp->saturate = true;
1596 } else if (result_dst.type == GLSL_TYPE_FLOAT) {
1597 /* Negating the result of the dot-product gives values on the range
1598 * [-4, 0]. Zero stays zero, and negative values become 1.0. This
1599 * is achieved using SLT.
1600 */
1601 st_src_reg slt_src = result_src;
1602 slt_src.negate = ~slt_src.negate;
1603 emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
1604 }
1605 else {
1606 /* Use SNE 0 if integers are being used as boolean values. */
1607 emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
1608 }
1609 break;
1610 }
1611
1612 case ir_binop_logic_xor:
1613 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1614 break;
1615
1616 case ir_binop_logic_or: {
1617 /* After the addition, the value will be an integer on the
1618 * range [0,2]. Zero stays zero, and positive values become 1.0.
1619 */
1620 glsl_to_tgsi_instruction *add =
1621 emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
1622 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
1623 result_dst.type == GLSL_TYPE_FLOAT) {
1624 /* The clamping to [0,1] can be done for free in the fragment
1625 * shader with a saturate if floats are being used as boolean values.
1626 */
1627 add->saturate = true;
1628 } else if (result_dst.type == GLSL_TYPE_FLOAT) {
1629 /* Negating the result of the addition gives values on the range
1630 * [-2, 0]. Zero stays zero, and negative values become 1.0. This
1631 * is achieved using SLT.
1632 */
1633 st_src_reg slt_src = result_src;
1634 slt_src.negate = ~slt_src.negate;
1635 emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
1636 } else {
1637 /* Use an SNE on the result of the addition. Zero stays zero,
1638 * 1 stays 1, and 2 becomes 1.
1639 */
1640 emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
1641 }
1642 break;
1643 }
1644
1645 case ir_binop_logic_and:
1646 /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
1647 emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
1648 break;
1649
1650 case ir_binop_dot:
1651 assert(ir->operands[0]->type->is_vector());
1652 assert(ir->operands[0]->type == ir->operands[1]->type);
1653 emit_dp(ir, result_dst, op[0], op[1],
1654 ir->operands[0]->type->vector_elements);
1655 break;
1656
1657 case ir_unop_sqrt:
1658 /* sqrt(x) = x * rsq(x). */
1659 emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
1660 emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]);
1661 /* For incoming channels <= 0, set the result to 0. */
1662 op[0].negate = ~op[0].negate;
1663 emit(ir, TGSI_OPCODE_CMP, result_dst,
1664 op[0], result_src, st_src_reg_for_float(0.0));
1665 break;
1666 case ir_unop_rsq:
1667 emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
1668 break;
1669 case ir_unop_i2f:
1670 case ir_unop_b2f:
1671 if (native_integers) {
1672 emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]);
1673 break;
1674 }
1675 case ir_unop_i2u:
1676 case ir_unop_u2i:
1677 /* Converting between signed and unsigned integers is a no-op. */
1678 case ir_unop_b2i:
1679 /* Booleans are stored as integers (or floats in GLSL 1.20 and lower). */
1680 result_src = op[0];
1681 break;
1682 case ir_unop_f2i:
1683 if (native_integers)
1684 emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]);
1685 else
1686 emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
1687 break;
1688 case ir_unop_f2b:
1689 case ir_unop_i2b:
1690 emit(ir, TGSI_OPCODE_SNE, result_dst, op[0],
1691 st_src_reg_for_type(result_dst.type, 0));
1692 break;
1693 case ir_unop_trunc:
1694 emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
1695 break;
1696 case ir_unop_ceil:
1697 op[0].negate = ~op[0].negate;
1698 emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
1699 result_src.negate = ~result_src.negate;
1700 break;
1701 case ir_unop_floor:
1702 emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
1703 break;
1704 case ir_unop_fract:
1705 emit(ir, TGSI_OPCODE_FRC, result_dst, op[0]);
1706 break;
1707
1708 case ir_binop_min:
1709 emit(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]);
1710 break;
1711 case ir_binop_max:
1712 emit(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]);
1713 break;
1714 case ir_binop_pow:
1715 emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]);
1716 break;
1717
1718 case ir_unop_bit_not:
1719 if (glsl_version >= 130) {
1720 emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
1721 break;
1722 }
1723 case ir_unop_u2f:
1724 if (native_integers) {
1725 emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]);
1726 break;
1727 }
1728 case ir_binop_lshift:
1729 if (glsl_version >= 130) {
1730 emit(ir, TGSI_OPCODE_SHL, result_dst, op[0]);
1731 break;
1732 }
1733 case ir_binop_rshift:
1734 if (glsl_version >= 130) {
1735 emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0]);
1736 break;
1737 }
1738 case ir_binop_bit_and:
1739 if (glsl_version >= 130) {
1740 emit(ir, TGSI_OPCODE_AND, result_dst, op[0]);
1741 break;
1742 }
1743 case ir_binop_bit_xor:
1744 if (glsl_version >= 130) {
1745 emit(ir, TGSI_OPCODE_XOR, result_dst, op[0]);
1746 break;
1747 }
1748 case ir_binop_bit_or:
1749 if (glsl_version >= 130) {
1750 emit(ir, TGSI_OPCODE_OR, result_dst, op[0]);
1751 break;
1752 }
1753 case ir_unop_round_even:
1754 assert(!"GLSL 1.30 features unsupported");
1755 break;
1756
1757 case ir_quadop_vector:
1758 /* This operation should have already been handled.
1759 */
1760 assert(!"Should not get here.");
1761 break;
1762 }
1763
1764 this->result = result_src;
1765 }
1766
1767
1768 void
1769 glsl_to_tgsi_visitor::visit(ir_swizzle *ir)
1770 {
1771 st_src_reg src;
1772 int i;
1773 int swizzle[4];
1774
1775 /* Note that this is only swizzles in expressions, not those on the left
1776 * hand side of an assignment, which do write masking. See ir_assignment
1777 * for that.
1778 */
1779
1780 ir->val->accept(this);
1781 src = this->result;
1782 assert(src.file != PROGRAM_UNDEFINED);
1783
1784 for (i = 0; i < 4; i++) {
1785 if (i < ir->type->vector_elements) {
1786 switch (i) {
1787 case 0:
1788 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x);
1789 break;
1790 case 1:
1791 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y);
1792 break;
1793 case 2:
1794 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z);
1795 break;
1796 case 3:
1797 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w);
1798 break;
1799 }
1800 } else {
1801 /* If the type is smaller than a vec4, replicate the last
1802 * channel out.
1803 */
1804 swizzle[i] = swizzle[ir->type->vector_elements - 1];
1805 }
1806 }
1807
1808 src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1809
1810 this->result = src;
1811 }
1812
1813 void
1814 glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
1815 {
1816 variable_storage *entry = find_variable_storage(ir->var);
1817 ir_variable *var = ir->var;
1818
1819 if (!entry) {
1820 switch (var->mode) {
1821 case ir_var_uniform:
1822 entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
1823 var->location);
1824 this->variables.push_tail(entry);
1825 break;
1826 case ir_var_in:
1827 case ir_var_inout:
1828 /* The linker assigns locations for varyings and attributes,
1829 * including deprecated builtins (like gl_Color), user-assign
1830 * generic attributes (glBindVertexLocation), and
1831 * user-defined varyings.
1832 *
1833 * FINISHME: We would hit this path for function arguments. Fix!
1834 */
1835 assert(var->location != -1);
1836 entry = new(mem_ctx) variable_storage(var,
1837 PROGRAM_INPUT,
1838 var->location);
1839 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
1840 var->location >= VERT_ATTRIB_GENERIC0) {
1841 _mesa_add_attribute(this->prog->Attributes,
1842 var->name,
1843 _mesa_sizeof_glsl_type(var->type->gl_type),
1844 var->type->gl_type,
1845 var->location - VERT_ATTRIB_GENERIC0);
1846 }
1847 break;
1848 case ir_var_out:
1849 assert(var->location != -1);
1850 entry = new(mem_ctx) variable_storage(var,
1851 PROGRAM_OUTPUT,
1852 var->location);
1853 break;
1854 case ir_var_system_value:
1855 entry = new(mem_ctx) variable_storage(var,
1856 PROGRAM_SYSTEM_VALUE,
1857 var->location);
1858 break;
1859 case ir_var_auto:
1860 case ir_var_temporary:
1861 entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY,
1862 this->next_temp);
1863 this->variables.push_tail(entry);
1864
1865 next_temp += type_size(var->type);
1866 break;
1867 }
1868
1869 if (!entry) {
1870 printf("Failed to make storage for %s\n", var->name);
1871 exit(1);
1872 }
1873 }
1874
1875 this->result = st_src_reg(entry->file, entry->index, var->type);
1876 if (!native_integers)
1877 this->result.type = GLSL_TYPE_FLOAT;
1878 }
1879
1880 void
1881 glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
1882 {
1883 ir_constant *index;
1884 st_src_reg src;
1885 int element_size = type_size(ir->type);
1886
1887 index = ir->array_index->constant_expression_value();
1888
1889 ir->array->accept(this);
1890 src = this->result;
1891
1892 if (index) {
1893 src.index += index->value.i[0] * element_size;
1894 } else {
1895 /* Variable index array dereference. It eats the "vec4" of the
1896 * base of the array and an index that offsets the TGSI register
1897 * index.
1898 */
1899 ir->array_index->accept(this);
1900
1901 st_src_reg index_reg;
1902
1903 if (element_size == 1) {
1904 index_reg = this->result;
1905 } else {
1906 index_reg = get_temp(glsl_type::float_type);
1907
1908 emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg),
1909 this->result, st_src_reg_for_float(element_size));
1910 }
1911
1912 /* If there was already a relative address register involved, add the
1913 * new and the old together to get the new offset.
1914 */
1915 if (src.reladdr != NULL) {
1916 st_src_reg accum_reg = get_temp(glsl_type::float_type);
1917
1918 emit(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg),
1919 index_reg, *src.reladdr);
1920
1921 index_reg = accum_reg;
1922 }
1923
1924 src.reladdr = ralloc(mem_ctx, st_src_reg);
1925 memcpy(src.reladdr, &index_reg, sizeof(index_reg));
1926 }
1927
1928 /* If the type is smaller than a vec4, replicate the last channel out. */
1929 if (ir->type->is_scalar() || ir->type->is_vector())
1930 src.swizzle = swizzle_for_size(ir->type->vector_elements);
1931 else
1932 src.swizzle = SWIZZLE_NOOP;
1933
1934 this->result = src;
1935 }
1936
1937 void
1938 glsl_to_tgsi_visitor::visit(ir_dereference_record *ir)
1939 {
1940 unsigned int i;
1941 const glsl_type *struct_type = ir->record->type;
1942 int offset = 0;
1943
1944 ir->record->accept(this);
1945
1946 for (i = 0; i < struct_type->length; i++) {
1947 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
1948 break;
1949 offset += type_size(struct_type->fields.structure[i].type);
1950 }
1951
1952 /* If the type is smaller than a vec4, replicate the last channel out. */
1953 if (ir->type->is_scalar() || ir->type->is_vector())
1954 this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
1955 else
1956 this->result.swizzle = SWIZZLE_NOOP;
1957
1958 this->result.index += offset;
1959 }
1960
1961 /**
1962 * We want to be careful in assignment setup to hit the actual storage
1963 * instead of potentially using a temporary like we might with the
1964 * ir_dereference handler.
1965 */
1966 static st_dst_reg
1967 get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v)
1968 {
1969 /* The LHS must be a dereference. If the LHS is a variable indexed array
1970 * access of a vector, it must be separated into a series conditional moves
1971 * before reaching this point (see ir_vec_index_to_cond_assign).
1972 */
1973 assert(ir->as_dereference());
1974 ir_dereference_array *deref_array = ir->as_dereference_array();
1975 if (deref_array) {
1976 assert(!deref_array->array->type->is_vector());
1977 }
1978
1979 /* Use the rvalue deref handler for the most part. We'll ignore
1980 * swizzles in it and write swizzles using writemask, though.
1981 */
1982 ir->accept(v);
1983 return st_dst_reg(v->result);
1984 }
1985
1986 /**
1987 * Process the condition of a conditional assignment
1988 *
1989 * Examines the condition of a conditional assignment to generate the optimal
1990 * first operand of a \c CMP instruction. If the condition is a relational
1991 * operator with 0 (e.g., \c ir_binop_less), the value being compared will be
1992 * used as the source for the \c CMP instruction. Otherwise the comparison
1993 * is processed to a boolean result, and the boolean result is used as the
1994 * operand to the CMP instruction.
1995 */
1996 bool
1997 glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir)
1998 {
1999 ir_rvalue *src_ir = ir;
2000 bool negate = true;
2001 bool switch_order = false;
2002
2003 ir_expression *const expr = ir->as_expression();
2004 if ((expr != NULL) && (expr->get_num_operands() == 2)) {
2005 bool zero_on_left = false;
2006
2007 if (expr->operands[0]->is_zero()) {
2008 src_ir = expr->operands[1];
2009 zero_on_left = true;
2010 } else if (expr->operands[1]->is_zero()) {
2011 src_ir = expr->operands[0];
2012 zero_on_left = false;
2013 }
2014
2015 /* a is - 0 + - 0 +
2016 * (a < 0) T F F ( a < 0) T F F
2017 * (0 < a) F F T (-a < 0) F F T
2018 * (a <= 0) T T F (-a < 0) F F T (swap order of other operands)
2019 * (0 <= a) F T T ( a < 0) T F F (swap order of other operands)
2020 * (a > 0) F F T (-a < 0) F F T
2021 * (0 > a) T F F ( a < 0) T F F
2022 * (a >= 0) F T T ( a < 0) T F F (swap order of other operands)
2023 * (0 >= a) T T F (-a < 0) F F T (swap order of other operands)
2024 *
2025 * Note that exchanging the order of 0 and 'a' in the comparison simply
2026 * means that the value of 'a' should be negated.
2027 */
2028 if (src_ir != ir) {
2029 switch (expr->operation) {
2030 case ir_binop_less:
2031 switch_order = false;
2032 negate = zero_on_left;
2033 break;
2034
2035 case ir_binop_greater:
2036 switch_order = false;
2037 negate = !zero_on_left;
2038 break;
2039
2040 case ir_binop_lequal:
2041 switch_order = true;
2042 negate = !zero_on_left;
2043 break;
2044
2045 case ir_binop_gequal:
2046 switch_order = true;
2047 negate = zero_on_left;
2048 break;
2049
2050 default:
2051 /* This isn't the right kind of comparison afterall, so make sure
2052 * the whole condition is visited.
2053 */
2054 src_ir = ir;
2055 break;
2056 }
2057 }
2058 }
2059
2060 src_ir->accept(this);
2061
2062 /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
2063 * condition we produced is 0.0 or 1.0. By flipping the sign, we can
2064 * choose which value TGSI_OPCODE_CMP produces without an extra instruction
2065 * computing the condition.
2066 */
2067 if (negate)
2068 this->result.negate = ~this->result.negate;
2069
2070 return switch_order;
2071 }
2072
2073 void
2074 glsl_to_tgsi_visitor::visit(ir_assignment *ir)
2075 {
2076 st_dst_reg l;
2077 st_src_reg r;
2078 int i;
2079
2080 ir->rhs->accept(this);
2081 r = this->result;
2082
2083 l = get_assignment_lhs(ir->lhs, this);
2084
2085 /* FINISHME: This should really set to the correct maximal writemask for each
2086 * FINISHME: component written (in the loops below). This case can only
2087 * FINISHME: occur for matrices, arrays, and structures.
2088 */
2089 if (ir->write_mask == 0) {
2090 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
2091 l.writemask = WRITEMASK_XYZW;
2092 } else if (ir->lhs->type->is_scalar() &&
2093 ir->lhs->variable_referenced()->mode == ir_var_out) {
2094 /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the
2095 * FINISHME: W component of fragment shader output zero, work correctly.
2096 */
2097 l.writemask = WRITEMASK_XYZW;
2098 } else {
2099 int swizzles[4];
2100 int first_enabled_chan = 0;
2101 int rhs_chan = 0;
2102
2103 l.writemask = ir->write_mask;
2104
2105 for (int i = 0; i < 4; i++) {
2106 if (l.writemask & (1 << i)) {
2107 first_enabled_chan = GET_SWZ(r.swizzle, i);
2108 break;
2109 }
2110 }
2111
2112 /* Swizzle a small RHS vector into the channels being written.
2113 *
2114 * glsl ir treats write_mask as dictating how many channels are
2115 * present on the RHS while TGSI treats write_mask as just
2116 * showing which channels of the vec4 RHS get written.
2117 */
2118 for (int i = 0; i < 4; i++) {
2119 if (l.writemask & (1 << i))
2120 swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++);
2121 else
2122 swizzles[i] = first_enabled_chan;
2123 }
2124 r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1],
2125 swizzles[2], swizzles[3]);
2126 }
2127
2128 assert(l.file != PROGRAM_UNDEFINED);
2129 assert(r.file != PROGRAM_UNDEFINED);
2130
2131 if (ir->condition) {
2132 const bool switch_order = this->process_move_condition(ir->condition);
2133 st_src_reg condition = this->result;
2134
2135 for (i = 0; i < type_size(ir->lhs->type); i++) {
2136 st_src_reg l_src = st_src_reg(l);
2137 l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements);
2138
2139 if (switch_order) {
2140 emit(ir, TGSI_OPCODE_CMP, l, condition, l_src, r);
2141 } else {
2142 emit(ir, TGSI_OPCODE_CMP, l, condition, r, l_src);
2143 }
2144
2145 l.index++;
2146 r.index++;
2147 }
2148 } else if (ir->rhs->as_expression() &&
2149 this->instructions.get_tail() &&
2150 ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir &&
2151 type_size(ir->lhs->type) == 1 &&
2152 l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst.writemask) {
2153 /* To avoid emitting an extra MOV when assigning an expression to a
2154 * variable, emit the last instruction of the expression again, but
2155 * replace the destination register with the target of the assignment.
2156 * Dead code elimination will remove the original instruction.
2157 */
2158 glsl_to_tgsi_instruction *inst, *new_inst;
2159 inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
2160 new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]);
2161 new_inst->saturate = inst->saturate;
2162 } else {
2163 for (i = 0; i < type_size(ir->lhs->type); i++) {
2164 emit(ir, TGSI_OPCODE_MOV, l, r);
2165 l.index++;
2166 r.index++;
2167 }
2168 }
2169 }
2170
2171
2172 void
2173 glsl_to_tgsi_visitor::visit(ir_constant *ir)
2174 {
2175 st_src_reg src;
2176 GLfloat stack_vals[4] = { 0 };
2177 gl_constant_value *values = (gl_constant_value *) stack_vals;
2178 GLenum gl_type = GL_NONE;
2179 unsigned int i;
2180 static int in_array = 0;
2181 gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE;
2182
2183 /* Unfortunately, 4 floats is all we can get into
2184 * _mesa_add_typed_unnamed_constant. So, make a temp to store an
2185 * aggregate constant and move each constant value into it. If we
2186 * get lucky, copy propagation will eliminate the extra moves.
2187 */
2188 if (ir->type->base_type == GLSL_TYPE_STRUCT) {
2189 st_src_reg temp_base = get_temp(ir->type);
2190 st_dst_reg temp = st_dst_reg(temp_base);
2191
2192 foreach_iter(exec_list_iterator, iter, ir->components) {
2193 ir_constant *field_value = (ir_constant *)iter.get();
2194 int size = type_size(field_value->type);
2195
2196 assert(size > 0);
2197
2198 field_value->accept(this);
2199 src = this->result;
2200
2201 for (i = 0; i < (unsigned int)size; i++) {
2202 emit(ir, TGSI_OPCODE_MOV, temp, src);
2203
2204 src.index++;
2205 temp.index++;
2206 }
2207 }
2208 this->result = temp_base;
2209 return;
2210 }
2211
2212 if (ir->type->is_array()) {
2213 st_src_reg temp_base = get_temp(ir->type);
2214 st_dst_reg temp = st_dst_reg(temp_base);
2215 int size = type_size(ir->type->fields.array);
2216
2217 assert(size > 0);
2218 in_array++;
2219
2220 for (i = 0; i < ir->type->length; i++) {
2221 ir->array_elements[i]->accept(this);
2222 src = this->result;
2223 for (int j = 0; j < size; j++) {
2224 emit(ir, TGSI_OPCODE_MOV, temp, src);
2225
2226 src.index++;
2227 temp.index++;
2228 }
2229 }
2230 this->result = temp_base;
2231 in_array--;
2232 return;
2233 }
2234
2235 if (ir->type->is_matrix()) {
2236 st_src_reg mat = get_temp(ir->type);
2237 st_dst_reg mat_column = st_dst_reg(mat);
2238
2239 for (i = 0; i < ir->type->matrix_columns; i++) {
2240 assert(ir->type->base_type == GLSL_TYPE_FLOAT);
2241 values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements];
2242
2243 src = st_src_reg(file, -1, ir->type->base_type);
2244 src.index = add_constant(file,
2245 values,
2246 ir->type->vector_elements,
2247 GL_FLOAT,
2248 &src.swizzle);
2249 emit(ir, TGSI_OPCODE_MOV, mat_column, src);
2250
2251 mat_column.index++;
2252 }
2253
2254 this->result = mat;
2255 return;
2256 }
2257
2258 switch (ir->type->base_type) {
2259 case GLSL_TYPE_FLOAT:
2260 gl_type = GL_FLOAT;
2261 for (i = 0; i < ir->type->vector_elements; i++) {
2262 values[i].f = ir->value.f[i];
2263 }
2264 break;
2265 case GLSL_TYPE_UINT:
2266 gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT;
2267 for (i = 0; i < ir->type->vector_elements; i++) {
2268 if (native_integers)
2269 values[i].u = ir->value.u[i];
2270 else
2271 values[i].f = ir->value.u[i];
2272 }
2273 break;
2274 case GLSL_TYPE_INT:
2275 gl_type = native_integers ? GL_INT : GL_FLOAT;
2276 for (i = 0; i < ir->type->vector_elements; i++) {
2277 if (native_integers)
2278 values[i].i = ir->value.i[i];
2279 else
2280 values[i].f = ir->value.i[i];
2281 }
2282 break;
2283 case GLSL_TYPE_BOOL:
2284 gl_type = native_integers ? GL_BOOL : GL_FLOAT;
2285 for (i = 0; i < ir->type->vector_elements; i++) {
2286 if (native_integers)
2287 values[i].b = ir->value.b[i];
2288 else
2289 values[i].f = ir->value.b[i];
2290 }
2291 break;
2292 default:
2293 assert(!"Non-float/uint/int/bool constant");
2294 }
2295
2296 this->result = st_src_reg(file, -1, ir->type);
2297 this->result.index = add_constant(file,
2298 values,
2299 ir->type->vector_elements,
2300 gl_type,
2301 &this->result.swizzle);
2302 }
2303
2304 function_entry *
2305 glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig)
2306 {
2307 function_entry *entry;
2308
2309 foreach_iter(exec_list_iterator, iter, this->function_signatures) {
2310 entry = (function_entry *)iter.get();
2311
2312 if (entry->sig == sig)
2313 return entry;
2314 }
2315
2316 entry = ralloc(mem_ctx, function_entry);
2317 entry->sig = sig;
2318 entry->sig_id = this->next_signature_id++;
2319 entry->bgn_inst = NULL;
2320
2321 /* Allocate storage for all the parameters. */
2322 foreach_iter(exec_list_iterator, iter, sig->parameters) {
2323 ir_variable *param = (ir_variable *)iter.get();
2324 variable_storage *storage;
2325
2326 storage = find_variable_storage(param);
2327 assert(!storage);
2328
2329 storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY,
2330 this->next_temp);
2331 this->variables.push_tail(storage);
2332
2333 this->next_temp += type_size(param->type);
2334 }
2335
2336 if (!sig->return_type->is_void()) {
2337 entry->return_reg = get_temp(sig->return_type);
2338 } else {
2339 entry->return_reg = undef_src;
2340 }
2341
2342 this->function_signatures.push_tail(entry);
2343 return entry;
2344 }
2345
2346 void
2347 glsl_to_tgsi_visitor::visit(ir_call *ir)
2348 {
2349 glsl_to_tgsi_instruction *call_inst;
2350 ir_function_signature *sig = ir->get_callee();
2351 function_entry *entry = get_function_signature(sig);
2352 int i;
2353
2354 /* Process in parameters. */
2355 exec_list_iterator sig_iter = sig->parameters.iterator();
2356 foreach_iter(exec_list_iterator, iter, *ir) {
2357 ir_rvalue *param_rval = (ir_rvalue *)iter.get();
2358 ir_variable *param = (ir_variable *)sig_iter.get();
2359
2360 if (param->mode == ir_var_in ||
2361 param->mode == ir_var_inout) {
2362 variable_storage *storage = find_variable_storage(param);
2363 assert(storage);
2364
2365 param_rval->accept(this);
2366 st_src_reg r = this->result;
2367
2368 st_dst_reg l;
2369 l.file = storage->file;
2370 l.index = storage->index;
2371 l.reladdr = NULL;
2372 l.writemask = WRITEMASK_XYZW;
2373 l.cond_mask = COND_TR;
2374
2375 for (i = 0; i < type_size(param->type); i++) {
2376 emit(ir, TGSI_OPCODE_MOV, l, r);
2377 l.index++;
2378 r.index++;
2379 }
2380 }
2381
2382 sig_iter.next();
2383 }
2384 assert(!sig_iter.has_next());
2385
2386 /* Emit call instruction */
2387 call_inst = emit(ir, TGSI_OPCODE_CAL);
2388 call_inst->function = entry;
2389
2390 /* Process out parameters. */
2391 sig_iter = sig->parameters.iterator();
2392 foreach_iter(exec_list_iterator, iter, *ir) {
2393 ir_rvalue *param_rval = (ir_rvalue *)iter.get();
2394 ir_variable *param = (ir_variable *)sig_iter.get();
2395
2396 if (param->mode == ir_var_out ||
2397 param->mode == ir_var_inout) {
2398 variable_storage *storage = find_variable_storage(param);
2399 assert(storage);
2400
2401 st_src_reg r;
2402 r.file = storage->file;
2403 r.index = storage->index;
2404 r.reladdr = NULL;
2405 r.swizzle = SWIZZLE_NOOP;
2406 r.negate = 0;
2407
2408 param_rval->accept(this);
2409 st_dst_reg l = st_dst_reg(this->result);
2410
2411 for (i = 0; i < type_size(param->type); i++) {
2412 emit(ir, TGSI_OPCODE_MOV, l, r);
2413 l.index++;
2414 r.index++;
2415 }
2416 }
2417
2418 sig_iter.next();
2419 }
2420 assert(!sig_iter.has_next());
2421
2422 /* Process return value. */
2423 this->result = entry->return_reg;
2424 }
2425
2426 void
2427 glsl_to_tgsi_visitor::visit(ir_texture *ir)
2428 {
2429 st_src_reg result_src, coord, lod_info, projector, dx, dy, offset;
2430 st_dst_reg result_dst, coord_dst;
2431 glsl_to_tgsi_instruction *inst = NULL;
2432 unsigned opcode = TGSI_OPCODE_NOP;
2433
2434 if (ir->coordinate) {
2435 ir->coordinate->accept(this);
2436
2437 /* Put our coords in a temp. We'll need to modify them for shadow,
2438 * projection, or LOD, so the only case we'd use it as is is if
2439 * we're doing plain old texturing. The optimization passes on
2440 * glsl_to_tgsi_visitor should handle cleaning up our mess in that case.
2441 */
2442 coord = get_temp(glsl_type::vec4_type);
2443 coord_dst = st_dst_reg(coord);
2444 emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
2445 }
2446
2447 if (ir->projector) {
2448 ir->projector->accept(this);
2449 projector = this->result;
2450 }
2451
2452 /* Storage for our result. Ideally for an assignment we'd be using
2453 * the actual storage for the result here, instead.
2454 */
2455 result_src = get_temp(glsl_type::vec4_type);
2456 result_dst = st_dst_reg(result_src);
2457
2458 switch (ir->op) {
2459 case ir_tex:
2460 opcode = TGSI_OPCODE_TEX;
2461 break;
2462 case ir_txb:
2463 opcode = TGSI_OPCODE_TXB;
2464 ir->lod_info.bias->accept(this);
2465 lod_info = this->result;
2466 break;
2467 case ir_txl:
2468 opcode = TGSI_OPCODE_TXL;
2469 ir->lod_info.lod->accept(this);
2470 lod_info = this->result;
2471 break;
2472 case ir_txd:
2473 opcode = TGSI_OPCODE_TXD;
2474 ir->lod_info.grad.dPdx->accept(this);
2475 dx = this->result;
2476 ir->lod_info.grad.dPdy->accept(this);
2477 dy = this->result;
2478 break;
2479 case ir_txs:
2480 opcode = TGSI_OPCODE_TXQ;
2481 ir->lod_info.lod->accept(this);
2482 lod_info = this->result;
2483 break;
2484 case ir_txf:
2485 opcode = TGSI_OPCODE_TXF;
2486 ir->lod_info.lod->accept(this);
2487 lod_info = this->result;
2488 if (ir->offset) {
2489 ir->offset->accept(this);
2490 offset = this->result;
2491 }
2492 break;
2493 }
2494
2495 if (ir->projector) {
2496 if (opcode == TGSI_OPCODE_TEX) {
2497 /* Slot the projector in as the last component of the coord. */
2498 coord_dst.writemask = WRITEMASK_W;
2499 emit(ir, TGSI_OPCODE_MOV, coord_dst, projector);
2500 coord_dst.writemask = WRITEMASK_XYZW;
2501 opcode = TGSI_OPCODE_TXP;
2502 } else {
2503 st_src_reg coord_w = coord;
2504 coord_w.swizzle = SWIZZLE_WWWW;
2505
2506 /* For the other TEX opcodes there's no projective version
2507 * since the last slot is taken up by LOD info. Do the
2508 * projective divide now.
2509 */
2510 coord_dst.writemask = WRITEMASK_W;
2511 emit(ir, TGSI_OPCODE_RCP, coord_dst, projector);
2512
2513 /* In the case where we have to project the coordinates "by hand,"
2514 * the shadow comparator value must also be projected.
2515 */
2516 st_src_reg tmp_src = coord;
2517 if (ir->shadow_comparitor) {
2518 /* Slot the shadow value in as the second to last component of the
2519 * coord.
2520 */
2521 ir->shadow_comparitor->accept(this);
2522
2523 tmp_src = get_temp(glsl_type::vec4_type);
2524 st_dst_reg tmp_dst = st_dst_reg(tmp_src);
2525
2526 tmp_dst.writemask = WRITEMASK_Z;
2527 emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result);
2528
2529 tmp_dst.writemask = WRITEMASK_XY;
2530 emit(ir, TGSI_OPCODE_MOV, tmp_dst, coord);
2531 }
2532
2533 coord_dst.writemask = WRITEMASK_XYZ;
2534 emit(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w);
2535
2536 coord_dst.writemask = WRITEMASK_XYZW;
2537 coord.swizzle = SWIZZLE_XYZW;
2538 }
2539 }
2540
2541 /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow
2542 * comparator was put in the correct place (and projected) by the code,
2543 * above, that handles by-hand projection.
2544 */
2545 if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) {
2546 /* Slot the shadow value in as the second to last component of the
2547 * coord.
2548 */
2549 ir->shadow_comparitor->accept(this);
2550 coord_dst.writemask = WRITEMASK_Z;
2551 emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
2552 coord_dst.writemask = WRITEMASK_XYZW;
2553 }
2554
2555 if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB ||
2556 opcode == TGSI_OPCODE_TXF) {
2557 /* TGSI stores LOD or LOD bias in the last channel of the coords. */
2558 coord_dst.writemask = WRITEMASK_W;
2559 emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info);
2560 coord_dst.writemask = WRITEMASK_XYZW;
2561 }
2562
2563 if (opcode == TGSI_OPCODE_TXD)
2564 inst = emit(ir, opcode, result_dst, coord, dx, dy);
2565 else if (opcode == TGSI_OPCODE_TXQ)
2566 inst = emit(ir, opcode, result_dst, lod_info);
2567 else if (opcode == TGSI_OPCODE_TXF) {
2568 inst = emit(ir, opcode, result_dst, coord);
2569 } else
2570 inst = emit(ir, opcode, result_dst, coord);
2571
2572 if (ir->shadow_comparitor)
2573 inst->tex_shadow = GL_TRUE;
2574
2575 inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler,
2576 this->shader_program,
2577 this->prog);
2578
2579 if (ir->offset) {
2580 inst->tex_offset_num_offset = 1;
2581 inst->tex_offsets[0].Index = offset.index;
2582 inst->tex_offsets[0].File = offset.file;
2583 inst->tex_offsets[0].SwizzleX = GET_SWZ(offset.swizzle, 0);
2584 inst->tex_offsets[0].SwizzleY = GET_SWZ(offset.swizzle, 1);
2585 inst->tex_offsets[0].SwizzleZ = GET_SWZ(offset.swizzle, 2);
2586 }
2587
2588 const glsl_type *sampler_type = ir->sampler->type;
2589
2590 switch (sampler_type->sampler_dimensionality) {
2591 case GLSL_SAMPLER_DIM_1D:
2592 inst->tex_target = (sampler_type->sampler_array)
2593 ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
2594 break;
2595 case GLSL_SAMPLER_DIM_2D:
2596 inst->tex_target = (sampler_type->sampler_array)
2597 ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
2598 break;
2599 case GLSL_SAMPLER_DIM_3D:
2600 inst->tex_target = TEXTURE_3D_INDEX;
2601 break;
2602 case GLSL_SAMPLER_DIM_CUBE:
2603 inst->tex_target = TEXTURE_CUBE_INDEX;
2604 break;
2605 case GLSL_SAMPLER_DIM_RECT:
2606 inst->tex_target = TEXTURE_RECT_INDEX;
2607 break;
2608 case GLSL_SAMPLER_DIM_BUF:
2609 assert(!"FINISHME: Implement ARB_texture_buffer_object");
2610 break;
2611 default:
2612 assert(!"Should not get here.");
2613 }
2614
2615 this->result = result_src;
2616 }
2617
2618 void
2619 glsl_to_tgsi_visitor::visit(ir_return *ir)
2620 {
2621 if (ir->get_value()) {
2622 st_dst_reg l;
2623 int i;
2624
2625 assert(current_function);
2626
2627 ir->get_value()->accept(this);
2628 st_src_reg r = this->result;
2629
2630 l = st_dst_reg(current_function->return_reg);
2631
2632 for (i = 0; i < type_size(current_function->sig->return_type); i++) {
2633 emit(ir, TGSI_OPCODE_MOV, l, r);
2634 l.index++;
2635 r.index++;
2636 }
2637 }
2638
2639 emit(ir, TGSI_OPCODE_RET);
2640 }
2641
2642 void
2643 glsl_to_tgsi_visitor::visit(ir_discard *ir)
2644 {
2645 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
2646
2647 if (ir->condition) {
2648 ir->condition->accept(this);
2649 this->result.negate = ~this->result.negate;
2650 emit(ir, TGSI_OPCODE_KIL, undef_dst, this->result);
2651 } else {
2652 emit(ir, TGSI_OPCODE_KILP);
2653 }
2654
2655 fp->UsesKill = GL_TRUE;
2656 }
2657
2658 void
2659 glsl_to_tgsi_visitor::visit(ir_if *ir)
2660 {
2661 glsl_to_tgsi_instruction *cond_inst, *if_inst;
2662 glsl_to_tgsi_instruction *prev_inst;
2663
2664 prev_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
2665
2666 ir->condition->accept(this);
2667 assert(this->result.file != PROGRAM_UNDEFINED);
2668
2669 if (this->options->EmitCondCodes) {
2670 cond_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
2671
2672 /* See if we actually generated any instruction for generating
2673 * the condition. If not, then cook up a move to a temp so we
2674 * have something to set cond_update on.
2675 */
2676 if (cond_inst == prev_inst) {
2677 st_src_reg temp = get_temp(glsl_type::bool_type);
2678 cond_inst = emit(ir->condition, TGSI_OPCODE_MOV, st_dst_reg(temp), result);
2679 }
2680 cond_inst->cond_update = GL_TRUE;
2681
2682 if_inst = emit(ir->condition, TGSI_OPCODE_IF);
2683 if_inst->dst.cond_mask = COND_NE;
2684 } else {
2685 if_inst = emit(ir->condition, TGSI_OPCODE_IF, undef_dst, this->result);
2686 }
2687
2688 this->instructions.push_tail(if_inst);
2689
2690 visit_exec_list(&ir->then_instructions, this);
2691
2692 if (!ir->else_instructions.is_empty()) {
2693 emit(ir->condition, TGSI_OPCODE_ELSE);
2694 visit_exec_list(&ir->else_instructions, this);
2695 }
2696
2697 if_inst = emit(ir->condition, TGSI_OPCODE_ENDIF);
2698 }
2699
2700 glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
2701 {
2702 result.file = PROGRAM_UNDEFINED;
2703 next_temp = 1;
2704 next_signature_id = 1;
2705 num_immediates = 0;
2706 current_function = NULL;
2707 num_address_regs = 0;
2708 indirect_addr_temps = false;
2709 indirect_addr_consts = false;
2710 mem_ctx = ralloc_context(NULL);
2711 }
2712
2713 glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor()
2714 {
2715 ralloc_free(mem_ctx);
2716 }
2717
2718 extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v)
2719 {
2720 delete v;
2721 }
2722
2723
2724 /**
2725 * Count resources used by the given gpu program (number of texture
2726 * samplers, etc).
2727 */
2728 static void
2729 count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
2730 {
2731 v->samplers_used = 0;
2732
2733 foreach_iter(exec_list_iterator, iter, v->instructions) {
2734 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
2735
2736 if (is_tex_instruction(inst->op)) {
2737 v->samplers_used |= 1 << inst->sampler;
2738
2739 prog->SamplerTargets[inst->sampler] =
2740 (gl_texture_index)inst->tex_target;
2741 if (inst->tex_shadow) {
2742 prog->ShadowSamplers |= 1 << inst->sampler;
2743 }
2744 }
2745 }
2746
2747 prog->SamplersUsed = v->samplers_used;
2748 _mesa_update_shader_textures_used(prog);
2749 }
2750
2751
2752 /**
2753 * Check if the given vertex/fragment/shader program is within the
2754 * resource limits of the context (number of texture units, etc).
2755 * If any of those checks fail, record a linker error.
2756 *
2757 * XXX more checks are needed...
2758 */
2759 static void
2760 check_resources(const struct gl_context *ctx,
2761 struct gl_shader_program *shader_program,
2762 glsl_to_tgsi_visitor *prog,
2763 struct gl_program *proginfo)
2764 {
2765 switch (proginfo->Target) {
2766 case GL_VERTEX_PROGRAM_ARB:
2767 if (_mesa_bitcount(prog->samplers_used) >
2768 ctx->Const.MaxVertexTextureImageUnits) {
2769 fail_link(shader_program, "Too many vertex shader texture samplers");
2770 }
2771 if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) {
2772 fail_link(shader_program, "Too many vertex shader constants");
2773 }
2774 break;
2775 case MESA_GEOMETRY_PROGRAM:
2776 if (_mesa_bitcount(prog->samplers_used) >
2777 ctx->Const.MaxGeometryTextureImageUnits) {
2778 fail_link(shader_program, "Too many geometry shader texture samplers");
2779 }
2780 if (proginfo->Parameters->NumParameters >
2781 MAX_GEOMETRY_UNIFORM_COMPONENTS / 4) {
2782 fail_link(shader_program, "Too many geometry shader constants");
2783 }
2784 break;
2785 case GL_FRAGMENT_PROGRAM_ARB:
2786 if (_mesa_bitcount(prog->samplers_used) >
2787 ctx->Const.MaxTextureImageUnits) {
2788 fail_link(shader_program, "Too many fragment shader texture samplers");
2789 }
2790 if (proginfo->Parameters->NumParameters > MAX_UNIFORMS) {
2791 fail_link(shader_program, "Too many fragment shader constants");
2792 }
2793 break;
2794 default:
2795 _mesa_problem(ctx, "unexpected program type in check_resources()");
2796 }
2797 }
2798
2799
2800
2801 struct uniform_sort {
2802 struct gl_uniform *u;
2803 int pos;
2804 };
2805
2806 /* The shader_program->Uniforms list is almost sorted in increasing
2807 * uniform->{Frag,Vert}Pos locations, but not quite when there are
2808 * uniforms shared between targets. We need to add parameters in
2809 * increasing order for the targets.
2810 */
2811 static int
2812 sort_uniforms(const void *a, const void *b)
2813 {
2814 struct uniform_sort *u1 = (struct uniform_sort *)a;
2815 struct uniform_sort *u2 = (struct uniform_sort *)b;
2816
2817 return u1->pos - u2->pos;
2818 }
2819
2820 /* Add the uniforms to the parameters. The linker chose locations
2821 * in our parameters lists (which weren't created yet), which the
2822 * uniforms code will use to poke values into our parameters list
2823 * when uniforms are updated.
2824 */
2825 static void
2826 add_uniforms_to_parameters_list(struct gl_shader_program *shader_program,
2827 struct gl_shader *shader,
2828 struct gl_program *prog)
2829 {
2830 unsigned int i;
2831 unsigned int next_sampler = 0, num_uniforms = 0;
2832 struct uniform_sort *sorted_uniforms;
2833
2834 sorted_uniforms = ralloc_array(NULL, struct uniform_sort,
2835 shader_program->Uniforms->NumUniforms);
2836
2837 for (i = 0; i < shader_program->Uniforms->NumUniforms; i++) {
2838 struct gl_uniform *uniform = shader_program->Uniforms->Uniforms + i;
2839 int parameter_index = -1;
2840
2841 switch (shader->Type) {
2842 case GL_VERTEX_SHADER:
2843 parameter_index = uniform->VertPos;
2844 break;
2845 case GL_FRAGMENT_SHADER:
2846 parameter_index = uniform->FragPos;
2847 break;
2848 case GL_GEOMETRY_SHADER:
2849 parameter_index = uniform->GeomPos;
2850 break;
2851 }
2852
2853 /* Only add uniforms used in our target. */
2854 if (parameter_index != -1) {
2855 sorted_uniforms[num_uniforms].pos = parameter_index;
2856 sorted_uniforms[num_uniforms].u = uniform;
2857 num_uniforms++;
2858 }
2859 }
2860
2861 qsort(sorted_uniforms, num_uniforms, sizeof(struct uniform_sort),
2862 sort_uniforms);
2863
2864 for (i = 0; i < num_uniforms; i++) {
2865 struct gl_uniform *uniform = sorted_uniforms[i].u;
2866 int parameter_index = sorted_uniforms[i].pos;
2867 const glsl_type *type = uniform->Type;
2868 unsigned int size;
2869
2870 if (type->is_vector() ||
2871 type->is_scalar()) {
2872 size = type->vector_elements;
2873 } else {
2874 size = type_size(type) * 4;
2875 }
2876
2877 gl_register_file file;
2878 if (type->is_sampler() ||
2879 (type->is_array() && type->fields.array->is_sampler())) {
2880 file = PROGRAM_SAMPLER;
2881 } else {
2882 file = PROGRAM_UNIFORM;
2883 }
2884
2885 GLint index = _mesa_lookup_parameter_index(prog->Parameters, -1,
2886 uniform->Name);
2887
2888 if (index < 0) {
2889 index = _mesa_add_parameter(prog->Parameters, file,
2890 uniform->Name, size, type->gl_type,
2891 NULL, NULL, 0x0);
2892
2893 /* Sampler uniform values are stored in prog->SamplerUnits,
2894 * and the entry in that array is selected by this index we
2895 * store in ParameterValues[].
2896 */
2897 if (file == PROGRAM_SAMPLER) {
2898 for (unsigned int j = 0; j < size / 4; j++)
2899 prog->Parameters->ParameterValues[index + j][0].f = next_sampler++;
2900 }
2901
2902 /* The location chosen in the Parameters list here (returned
2903 * from _mesa_add_uniform) has to match what the linker chose.
2904 */
2905 if (index != parameter_index) {
2906 fail_link(shader_program, "Allocation of uniform `%s' to target "
2907 "failed (%d vs %d)\n",
2908 uniform->Name, index, parameter_index);
2909 }
2910 }
2911 }
2912
2913 ralloc_free(sorted_uniforms);
2914 }
2915
2916 static void
2917 set_uniform_initializer(struct gl_context *ctx, void *mem_ctx,
2918 struct gl_shader_program *shader_program,
2919 const char *name, const glsl_type *type,
2920 ir_constant *val)
2921 {
2922 if (type->is_record()) {
2923 ir_constant *field_constant;
2924
2925 field_constant = (ir_constant *)val->components.get_head();
2926
2927 for (unsigned int i = 0; i < type->length; i++) {
2928 const glsl_type *field_type = type->fields.structure[i].type;
2929 const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name,
2930 type->fields.structure[i].name);
2931 set_uniform_initializer(ctx, mem_ctx, shader_program, field_name,
2932 field_type, field_constant);
2933 field_constant = (ir_constant *)field_constant->next;
2934 }
2935 return;
2936 }
2937
2938 int loc = _mesa_get_uniform_location(ctx, shader_program, name);
2939
2940 if (loc == -1) {
2941 fail_link(shader_program,
2942 "Couldn't find uniform for initializer %s\n", name);
2943 return;
2944 }
2945
2946 for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) {
2947 ir_constant *element;
2948 const glsl_type *element_type;
2949 if (type->is_array()) {
2950 element = val->array_elements[i];
2951 element_type = type->fields.array;
2952 } else {
2953 element = val;
2954 element_type = type;
2955 }
2956
2957 void *values;
2958
2959 if (element_type->base_type == GLSL_TYPE_BOOL) {
2960 int *conv = ralloc_array(mem_ctx, int, element_type->components());
2961 for (unsigned int j = 0; j < element_type->components(); j++) {
2962 conv[j] = element->value.b[j];
2963 }
2964 values = (void *)conv;
2965 element_type = glsl_type::get_instance(GLSL_TYPE_INT,
2966 element_type->vector_elements,
2967 1);
2968 } else {
2969 values = &element->value;
2970 }
2971
2972 if (element_type->is_matrix()) {
2973 _mesa_uniform_matrix(ctx, shader_program,
2974 element_type->matrix_columns,
2975 element_type->vector_elements,
2976 loc, 1, GL_FALSE, (GLfloat *)values);
2977 loc += element_type->matrix_columns;
2978 } else {
2979 _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns,
2980 values, element_type->gl_type);
2981 loc += type_size(element_type);
2982 }
2983 }
2984 }
2985
2986 /*
2987 * Scan/rewrite program to remove reads of custom (output) registers.
2988 * The passed type has to be either PROGRAM_OUTPUT or PROGRAM_VARYING
2989 * (for vertex shaders).
2990 * In GLSL shaders, varying vars can be read and written.
2991 * On some hardware, trying to read an output register causes trouble.
2992 * So, rewrite the program to use a temporary register in this case.
2993 *
2994 * Based on _mesa_remove_output_reads from programopt.c.
2995 */
2996 void
2997 glsl_to_tgsi_visitor::remove_output_reads(gl_register_file type)
2998 {
2999 GLuint i;
3000 GLint outputMap[VERT_RESULT_MAX];
3001 GLint outputTypes[VERT_RESULT_MAX];
3002 GLuint numVaryingReads = 0;
3003 GLboolean usedTemps[MAX_TEMPS];
3004 GLuint firstTemp = 0;
3005
3006 _mesa_find_used_registers(prog, PROGRAM_TEMPORARY,
3007 usedTemps, MAX_TEMPS);
3008
3009 assert(type == PROGRAM_VARYING || type == PROGRAM_OUTPUT);
3010 assert(prog->Target == GL_VERTEX_PROGRAM_ARB || type != PROGRAM_VARYING);
3011
3012 for (i = 0; i < VERT_RESULT_MAX; i++)
3013 outputMap[i] = -1;
3014
3015 /* look for instructions which read from varying vars */
3016 foreach_iter(exec_list_iterator, iter, this->instructions) {
3017 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3018 const GLuint numSrc = num_inst_src_regs(inst->op);
3019 GLuint j;
3020 for (j = 0; j < numSrc; j++) {
3021 if (inst->src[j].file == type) {
3022 /* replace the read with a temp reg */
3023 const GLuint var = inst->src[j].index;
3024 if (outputMap[var] == -1) {
3025 numVaryingReads++;
3026 outputMap[var] = _mesa_find_free_register(usedTemps,
3027 MAX_TEMPS,
3028 firstTemp);
3029 outputTypes[var] = inst->src[j].type;
3030 firstTemp = outputMap[var] + 1;
3031 }
3032 inst->src[j].file = PROGRAM_TEMPORARY;
3033 inst->src[j].index = outputMap[var];
3034 }
3035 }
3036 }
3037
3038 if (numVaryingReads == 0)
3039 return; /* nothing to be done */
3040
3041 /* look for instructions which write to the varying vars identified above */
3042 foreach_iter(exec_list_iterator, iter, this->instructions) {
3043 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3044 if (inst->dst.file == type && outputMap[inst->dst.index] >= 0) {
3045 /* change inst to write to the temp reg, instead of the varying */
3046 inst->dst.file = PROGRAM_TEMPORARY;
3047 inst->dst.index = outputMap[inst->dst.index];
3048 }
3049 }
3050
3051 /* insert new MOV instructions at the end */
3052 for (i = 0; i < VERT_RESULT_MAX; i++) {
3053 if (outputMap[i] >= 0) {
3054 /* MOV VAR[i], TEMP[tmp]; */
3055 st_src_reg src = st_src_reg(PROGRAM_TEMPORARY, outputMap[i], outputTypes[i]);
3056 st_dst_reg dst = st_dst_reg(type, WRITEMASK_XYZW, outputTypes[i]);
3057 dst.index = i;
3058 this->emit(NULL, TGSI_OPCODE_MOV, dst, src);
3059 }
3060 }
3061 }
3062
3063 /**
3064 * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
3065 * are read from the given src in this instruction
3066 */
3067 static int
3068 get_src_arg_mask(st_dst_reg dst, st_src_reg src)
3069 {
3070 int read_mask = 0, comp;
3071
3072 /* Now, given the src swizzle and the written channels, find which
3073 * components are actually read
3074 */
3075 for (comp = 0; comp < 4; ++comp) {
3076 const unsigned coord = GET_SWZ(src.swizzle, comp);
3077 ASSERT(coord < 4);
3078 if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W)
3079 read_mask |= 1 << coord;
3080 }
3081
3082 return read_mask;
3083 }
3084
3085 /**
3086 * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP
3087 * instruction is the first instruction to write to register T0. There are
3088 * several lowering passes done in GLSL IR (e.g. branches and
3089 * relative addressing) that create a large number of conditional assignments
3090 * that ir_to_mesa converts to CMP instructions like the one mentioned above.
3091 *
3092 * Here is why this conversion is safe:
3093 * CMP T0, T1 T2 T0 can be expanded to:
3094 * if (T1 < 0.0)
3095 * MOV T0, T2;
3096 * else
3097 * MOV T0, T0;
3098 *
3099 * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same
3100 * as the original program. If (T1 < 0.0) evaluates to false, executing
3101 * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized.
3102 * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2
3103 * because any instruction that was going to read from T0 after this was going
3104 * to read a garbage value anyway.
3105 */
3106 void
3107 glsl_to_tgsi_visitor::simplify_cmp(void)
3108 {
3109 unsigned tempWrites[MAX_TEMPS];
3110 unsigned outputWrites[MAX_PROGRAM_OUTPUTS];
3111
3112 memset(tempWrites, 0, sizeof(tempWrites));
3113 memset(outputWrites, 0, sizeof(outputWrites));
3114
3115 foreach_iter(exec_list_iterator, iter, this->instructions) {
3116 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3117 unsigned prevWriteMask = 0;
3118
3119 /* Give up if we encounter relative addressing or flow control. */
3120 if (inst->dst.reladdr ||
3121 tgsi_get_opcode_info(inst->op)->is_branch ||
3122 inst->op == TGSI_OPCODE_BGNSUB ||
3123 inst->op == TGSI_OPCODE_CONT ||
3124 inst->op == TGSI_OPCODE_END ||
3125 inst->op == TGSI_OPCODE_ENDSUB ||
3126 inst->op == TGSI_OPCODE_RET) {
3127 return;
3128 }
3129
3130 if (inst->dst.file == PROGRAM_OUTPUT) {
3131 assert(inst->dst.index < MAX_PROGRAM_OUTPUTS);
3132 prevWriteMask = outputWrites[inst->dst.index];
3133 outputWrites[inst->dst.index] |= inst->dst.writemask;
3134 } else if (inst->dst.file == PROGRAM_TEMPORARY) {
3135 assert(inst->dst.index < MAX_TEMPS);
3136 prevWriteMask = tempWrites[inst->dst.index];
3137 tempWrites[inst->dst.index] |= inst->dst.writemask;
3138 }
3139
3140 /* For a CMP to be considered a conditional write, the destination
3141 * register and source register two must be the same. */
3142 if (inst->op == TGSI_OPCODE_CMP
3143 && !(inst->dst.writemask & prevWriteMask)
3144 && inst->src[2].file == inst->dst.file
3145 && inst->src[2].index == inst->dst.index
3146 && inst->dst.writemask == get_src_arg_mask(inst->dst, inst->src[2])) {
3147
3148 inst->op = TGSI_OPCODE_MOV;
3149 inst->src[0] = inst->src[1];
3150 }
3151 }
3152 }
3153
3154 /* Replaces all references to a temporary register index with another index. */
3155 void
3156 glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index)
3157 {
3158 foreach_iter(exec_list_iterator, iter, this->instructions) {
3159 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3160 unsigned j;
3161
3162 for (j=0; j < num_inst_src_regs(inst->op); j++) {
3163 if (inst->src[j].file == PROGRAM_TEMPORARY &&
3164 inst->src[j].index == index) {
3165 inst->src[j].index = new_index;
3166 }
3167 }
3168
3169 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
3170 inst->dst.index = new_index;
3171 }
3172 }
3173 }
3174
3175 int
3176 glsl_to_tgsi_visitor::get_first_temp_read(int index)
3177 {
3178 int depth = 0; /* loop depth */
3179 int loop_start = -1; /* index of the first active BGNLOOP (if any) */
3180 unsigned i = 0, j;
3181
3182 foreach_iter(exec_list_iterator, iter, this->instructions) {
3183 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3184
3185 for (j=0; j < num_inst_src_regs(inst->op); j++) {
3186 if (inst->src[j].file == PROGRAM_TEMPORARY &&
3187 inst->src[j].index == index) {
3188 return (depth == 0) ? i : loop_start;
3189 }
3190 }
3191
3192 if (inst->op == TGSI_OPCODE_BGNLOOP) {
3193 if(depth++ == 0)
3194 loop_start = i;
3195 } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
3196 if (--depth == 0)
3197 loop_start = -1;
3198 }
3199 assert(depth >= 0);
3200
3201 i++;
3202 }
3203
3204 return -1;
3205 }
3206
3207 int
3208 glsl_to_tgsi_visitor::get_first_temp_write(int index)
3209 {
3210 int depth = 0; /* loop depth */
3211 int loop_start = -1; /* index of the first active BGNLOOP (if any) */
3212 int i = 0;
3213
3214 foreach_iter(exec_list_iterator, iter, this->instructions) {
3215 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3216
3217 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
3218 return (depth == 0) ? i : loop_start;
3219 }
3220
3221 if (inst->op == TGSI_OPCODE_BGNLOOP) {
3222 if(depth++ == 0)
3223 loop_start = i;
3224 } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
3225 if (--depth == 0)
3226 loop_start = -1;
3227 }
3228 assert(depth >= 0);
3229
3230 i++;
3231 }
3232
3233 return -1;
3234 }
3235
3236 int
3237 glsl_to_tgsi_visitor::get_last_temp_read(int index)
3238 {
3239 int depth = 0; /* loop depth */
3240 int last = -1; /* index of last instruction that reads the temporary */
3241 unsigned i = 0, j;
3242
3243 foreach_iter(exec_list_iterator, iter, this->instructions) {
3244 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3245
3246 for (j=0; j < num_inst_src_regs(inst->op); j++) {
3247 if (inst->src[j].file == PROGRAM_TEMPORARY &&
3248 inst->src[j].index == index) {
3249 last = (depth == 0) ? i : -2;
3250 }
3251 }
3252
3253 if (inst->op == TGSI_OPCODE_BGNLOOP)
3254 depth++;
3255 else if (inst->op == TGSI_OPCODE_ENDLOOP)
3256 if (--depth == 0 && last == -2)
3257 last = i;
3258 assert(depth >= 0);
3259
3260 i++;
3261 }
3262
3263 assert(last >= -1);
3264 return last;
3265 }
3266
3267 int
3268 glsl_to_tgsi_visitor::get_last_temp_write(int index)
3269 {
3270 int depth = 0; /* loop depth */
3271 int last = -1; /* index of last instruction that writes to the temporary */
3272 int i = 0;
3273
3274 foreach_iter(exec_list_iterator, iter, this->instructions) {
3275 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3276
3277 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index)
3278 last = (depth == 0) ? i : -2;
3279
3280 if (inst->op == TGSI_OPCODE_BGNLOOP)
3281 depth++;
3282 else if (inst->op == TGSI_OPCODE_ENDLOOP)
3283 if (--depth == 0 && last == -2)
3284 last = i;
3285 assert(depth >= 0);
3286
3287 i++;
3288 }
3289
3290 assert(last >= -1);
3291 return last;
3292 }
3293
3294 /*
3295 * On a basic block basis, tracks available PROGRAM_TEMPORARY register
3296 * channels for copy propagation and updates following instructions to
3297 * use the original versions.
3298 *
3299 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
3300 * will occur. As an example, a TXP production before this pass:
3301 *
3302 * 0: MOV TEMP[1], INPUT[4].xyyy;
3303 * 1: MOV TEMP[1].w, INPUT[4].wwww;
3304 * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
3305 *
3306 * and after:
3307 *
3308 * 0: MOV TEMP[1], INPUT[4].xyyy;
3309 * 1: MOV TEMP[1].w, INPUT[4].wwww;
3310 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
3311 *
3312 * which allows for dead code elimination on TEMP[1]'s writes.
3313 */
3314 void
3315 glsl_to_tgsi_visitor::copy_propagate(void)
3316 {
3317 glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx,
3318 glsl_to_tgsi_instruction *,
3319 this->next_temp * 4);
3320 int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
3321 int level = 0;
3322
3323 foreach_iter(exec_list_iterator, iter, this->instructions) {
3324 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3325
3326 assert(inst->dst.file != PROGRAM_TEMPORARY
3327 || inst->dst.index < this->next_temp);
3328
3329 /* First, do any copy propagation possible into the src regs. */
3330 for (int r = 0; r < 3; r++) {
3331 glsl_to_tgsi_instruction *first = NULL;
3332 bool good = true;
3333 int acp_base = inst->src[r].index * 4;
3334
3335 if (inst->src[r].file != PROGRAM_TEMPORARY ||
3336 inst->src[r].reladdr)
3337 continue;
3338
3339 /* See if we can find entries in the ACP consisting of MOVs
3340 * from the same src register for all the swizzled channels
3341 * of this src register reference.
3342 */
3343 for (int i = 0; i < 4; i++) {
3344 int src_chan = GET_SWZ(inst->src[r].swizzle, i);
3345 glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan];
3346
3347 if (!copy_chan) {
3348 good = false;
3349 break;
3350 }
3351
3352 assert(acp_level[acp_base + src_chan] <= level);
3353
3354 if (!first) {
3355 first = copy_chan;
3356 } else {
3357 if (first->src[0].file != copy_chan->src[0].file ||
3358 first->src[0].index != copy_chan->src[0].index) {
3359 good = false;
3360 break;
3361 }
3362 }
3363 }
3364
3365 if (good) {
3366 /* We've now validated that we can copy-propagate to
3367 * replace this src register reference. Do it.
3368 */
3369 inst->src[r].file = first->src[0].file;
3370 inst->src[r].index = first->src[0].index;
3371
3372 int swizzle = 0;
3373 for (int i = 0; i < 4; i++) {
3374 int src_chan = GET_SWZ(inst->src[r].swizzle, i);
3375 glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan];
3376 swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) <<
3377 (3 * i));
3378 }
3379 inst->src[r].swizzle = swizzle;
3380 }
3381 }
3382
3383 switch (inst->op) {
3384 case TGSI_OPCODE_BGNLOOP:
3385 case TGSI_OPCODE_ENDLOOP:
3386 /* End of a basic block, clear the ACP entirely. */
3387 memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
3388 break;
3389
3390 case TGSI_OPCODE_IF:
3391 ++level;
3392 break;
3393
3394 case TGSI_OPCODE_ENDIF:
3395 case TGSI_OPCODE_ELSE:
3396 /* Clear all channels written inside the block from the ACP, but
3397 * leaving those that were not touched.
3398 */
3399 for (int r = 0; r < this->next_temp; r++) {
3400 for (int c = 0; c < 4; c++) {
3401 if (!acp[4 * r + c])
3402 continue;
3403
3404 if (acp_level[4 * r + c] >= level)
3405 acp[4 * r + c] = NULL;
3406 }
3407 }
3408 if (inst->op == TGSI_OPCODE_ENDIF)
3409 --level;
3410 break;
3411
3412 default:
3413 /* Continuing the block, clear any written channels from
3414 * the ACP.
3415 */
3416 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) {
3417 /* Any temporary might be written, so no copy propagation
3418 * across this instruction.
3419 */
3420 memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
3421 } else if (inst->dst.file == PROGRAM_OUTPUT &&
3422 inst->dst.reladdr) {
3423 /* Any output might be written, so no copy propagation
3424 * from outputs across this instruction.
3425 */
3426 for (int r = 0; r < this->next_temp; r++) {
3427 for (int c = 0; c < 4; c++) {
3428 if (!acp[4 * r + c])
3429 continue;
3430
3431 if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT)
3432 acp[4 * r + c] = NULL;
3433 }
3434 }
3435 } else if (inst->dst.file == PROGRAM_TEMPORARY ||
3436 inst->dst.file == PROGRAM_OUTPUT) {
3437 /* Clear where it's used as dst. */
3438 if (inst->dst.file == PROGRAM_TEMPORARY) {
3439 for (int c = 0; c < 4; c++) {
3440 if (inst->dst.writemask & (1 << c)) {
3441 acp[4 * inst->dst.index + c] = NULL;
3442 }
3443 }
3444 }
3445
3446 /* Clear where it's used as src. */
3447 for (int r = 0; r < this->next_temp; r++) {
3448 for (int c = 0; c < 4; c++) {
3449 if (!acp[4 * r + c])
3450 continue;
3451
3452 int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c);
3453
3454 if (acp[4 * r + c]->src[0].file == inst->dst.file &&
3455 acp[4 * r + c]->src[0].index == inst->dst.index &&
3456 inst->dst.writemask & (1 << src_chan))
3457 {
3458 acp[4 * r + c] = NULL;
3459 }
3460 }
3461 }
3462 }
3463 break;
3464 }
3465
3466 /* If this is a copy, add it to the ACP. */
3467 if (inst->op == TGSI_OPCODE_MOV &&
3468 inst->dst.file == PROGRAM_TEMPORARY &&
3469 !inst->dst.reladdr &&
3470 !inst->saturate &&
3471 !inst->src[0].reladdr &&
3472 !inst->src[0].negate) {
3473 for (int i = 0; i < 4; i++) {
3474 if (inst->dst.writemask & (1 << i)) {
3475 acp[4 * inst->dst.index + i] = inst;
3476 acp_level[4 * inst->dst.index + i] = level;
3477 }
3478 }
3479 }
3480 }
3481
3482 ralloc_free(acp_level);
3483 ralloc_free(acp);
3484 }
3485
3486 /*
3487 * Tracks available PROGRAM_TEMPORARY registers for dead code elimination.
3488 *
3489 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
3490 * will occur. As an example, a TXP production after copy propagation but
3491 * before this pass:
3492 *
3493 * 0: MOV TEMP[1], INPUT[4].xyyy;
3494 * 1: MOV TEMP[1].w, INPUT[4].wwww;
3495 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
3496 *
3497 * and after this pass:
3498 *
3499 * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
3500 *
3501 * FIXME: assumes that all functions are inlined (no support for BGNSUB/ENDSUB)
3502 * FIXME: doesn't eliminate all dead code inside of loops; it steps around them
3503 */
3504 void
3505 glsl_to_tgsi_visitor::eliminate_dead_code(void)
3506 {
3507 int i;
3508
3509 for (i=0; i < this->next_temp; i++) {
3510 int last_read = get_last_temp_read(i);
3511 int j = 0;
3512
3513 foreach_iter(exec_list_iterator, iter, this->instructions) {
3514 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3515
3516 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == i &&
3517 j > last_read)
3518 {
3519 iter.remove();
3520 delete inst;
3521 }
3522
3523 j++;
3524 }
3525 }
3526 }
3527
3528 /*
3529 * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead
3530 * code elimination. This is less primitive than eliminate_dead_code(), as it
3531 * is per-channel and can detect consecutive writes without a read between them
3532 * as dead code. However, there is some dead code that can be eliminated by
3533 * eliminate_dead_code() but not this function - for example, this function
3534 * cannot eliminate an instruction writing to a register that is never read and
3535 * is the only instruction writing to that register.
3536 *
3537 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
3538 * will occur.
3539 */
3540 int
3541 glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
3542 {
3543 glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx,
3544 glsl_to_tgsi_instruction *,
3545 this->next_temp * 4);
3546 int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
3547 int level = 0;
3548 int removed = 0;
3549
3550 foreach_iter(exec_list_iterator, iter, this->instructions) {
3551 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3552
3553 assert(inst->dst.file != PROGRAM_TEMPORARY
3554 || inst->dst.index < this->next_temp);
3555
3556 switch (inst->op) {
3557 case TGSI_OPCODE_BGNLOOP:
3558 case TGSI_OPCODE_ENDLOOP:
3559 /* End of a basic block, clear the write array entirely.
3560 * FIXME: This keeps us from killing dead code when the writes are
3561 * on either side of a loop, even when the register isn't touched
3562 * inside the loop.
3563 */
3564 memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
3565 break;
3566
3567 case TGSI_OPCODE_ENDIF:
3568 --level;
3569 break;
3570
3571 case TGSI_OPCODE_ELSE:
3572 /* Clear all channels written inside the preceding if block from the
3573 * write array, but leave those that were not touched.
3574 *
3575 * FIXME: This destroys opportunities to remove dead code inside of
3576 * IF blocks that are followed by an ELSE block.
3577 */
3578 for (int r = 0; r < this->next_temp; r++) {
3579 for (int c = 0; c < 4; c++) {
3580 if (!writes[4 * r + c])
3581 continue;
3582
3583 if (write_level[4 * r + c] >= level)
3584 writes[4 * r + c] = NULL;
3585 }
3586 }
3587 break;
3588
3589 case TGSI_OPCODE_IF:
3590 ++level;
3591 /* fallthrough to default case to mark the condition as read */
3592
3593 default:
3594 /* Continuing the block, clear any channels from the write array that
3595 * are read by this instruction.
3596 */
3597 for (unsigned i = 0; i < Elements(inst->src); i++) {
3598 if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){
3599 /* Any temporary might be read, so no dead code elimination
3600 * across this instruction.
3601 */
3602 memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
3603 } else if (inst->src[i].file == PROGRAM_TEMPORARY) {
3604 /* Clear where it's used as src. */
3605 int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0);
3606 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1);
3607 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2);
3608 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3);
3609
3610 for (int c = 0; c < 4; c++) {
3611 if (src_chans & (1 << c)) {
3612 writes[4 * inst->src[i].index + c] = NULL;
3613 }
3614 }
3615 }
3616 }
3617 break;
3618 }
3619
3620 /* If this instruction writes to a temporary, add it to the write array.
3621 * If there is already an instruction in the write array for one or more
3622 * of the channels, flag that channel write as dead.
3623 */
3624 if (inst->dst.file == PROGRAM_TEMPORARY &&
3625 !inst->dst.reladdr &&
3626 !inst->saturate) {
3627 for (int c = 0; c < 4; c++) {
3628 if (inst->dst.writemask & (1 << c)) {
3629 if (writes[4 * inst->dst.index + c]) {
3630 if (write_level[4 * inst->dst.index + c] < level)
3631 continue;
3632 else
3633 writes[4 * inst->dst.index + c]->dead_mask |= (1 << c);
3634 }
3635 writes[4 * inst->dst.index + c] = inst;
3636 write_level[4 * inst->dst.index + c] = level;
3637 }
3638 }
3639 }
3640 }
3641
3642 /* Anything still in the write array at this point is dead code. */
3643 for (int r = 0; r < this->next_temp; r++) {
3644 for (int c = 0; c < 4; c++) {
3645 glsl_to_tgsi_instruction *inst = writes[4 * r + c];
3646 if (inst)
3647 inst->dead_mask |= (1 << c);
3648 }
3649 }
3650
3651 /* Now actually remove the instructions that are completely dead and update
3652 * the writemask of other instructions with dead channels.
3653 */
3654 foreach_iter(exec_list_iterator, iter, this->instructions) {
3655 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3656
3657 if (!inst->dead_mask || !inst->dst.writemask)
3658 continue;
3659 else if (inst->dead_mask == inst->dst.writemask) {
3660 iter.remove();
3661 delete inst;
3662 removed++;
3663 } else
3664 inst->dst.writemask &= ~(inst->dead_mask);
3665 }
3666
3667 ralloc_free(write_level);
3668 ralloc_free(writes);
3669
3670 return removed;
3671 }
3672
3673 /* Merges temporary registers together where possible to reduce the number of
3674 * registers needed to run a program.
3675 *
3676 * Produces optimal code only after copy propagation and dead code elimination
3677 * have been run. */
3678 void
3679 glsl_to_tgsi_visitor::merge_registers(void)
3680 {
3681 int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp);
3682 int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp);
3683 int i, j;
3684
3685 /* Read the indices of the last read and first write to each temp register
3686 * into an array so that we don't have to traverse the instruction list as
3687 * much. */
3688 for (i=0; i < this->next_temp; i++) {
3689 last_reads[i] = get_last_temp_read(i);
3690 first_writes[i] = get_first_temp_write(i);
3691 }
3692
3693 /* Start looking for registers with non-overlapping usages that can be
3694 * merged together. */
3695 for (i=0; i < this->next_temp; i++) {
3696 /* Don't touch unused registers. */
3697 if (last_reads[i] < 0 || first_writes[i] < 0) continue;
3698
3699 for (j=0; j < this->next_temp; j++) {
3700 /* Don't touch unused registers. */
3701 if (last_reads[j] < 0 || first_writes[j] < 0) continue;
3702
3703 /* We can merge the two registers if the first write to j is after or
3704 * in the same instruction as the last read from i. Note that the
3705 * register at index i will always be used earlier or at the same time
3706 * as the register at index j. */
3707 if (first_writes[i] <= first_writes[j] &&
3708 last_reads[i] <= first_writes[j])
3709 {
3710 rename_temp_register(j, i); /* Replace all references to j with i.*/
3711
3712 /* Update the first_writes and last_reads arrays with the new
3713 * values for the merged register index, and mark the newly unused
3714 * register index as such. */
3715 last_reads[i] = last_reads[j];
3716 first_writes[j] = -1;
3717 last_reads[j] = -1;
3718 }
3719 }
3720 }
3721
3722 ralloc_free(last_reads);
3723 ralloc_free(first_writes);
3724 }
3725
3726 /* Reassign indices to temporary registers by reusing unused indices created
3727 * by optimization passes. */
3728 void
3729 glsl_to_tgsi_visitor::renumber_registers(void)
3730 {
3731 int i = 0;
3732 int new_index = 0;
3733
3734 for (i=0; i < this->next_temp; i++) {
3735 if (get_first_temp_read(i) < 0) continue;
3736 if (i != new_index)
3737 rename_temp_register(i, new_index);
3738 new_index++;
3739 }
3740
3741 this->next_temp = new_index;
3742 }
3743
3744 /**
3745 * Returns a fragment program which implements the current pixel transfer ops.
3746 * Based on get_pixel_transfer_program in st_atom_pixeltransfer.c.
3747 */
3748 extern "C" void
3749 get_pixel_transfer_visitor(struct st_fragment_program *fp,
3750 glsl_to_tgsi_visitor *original,
3751 int scale_and_bias, int pixel_maps)
3752 {
3753 glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
3754 struct st_context *st = st_context(original->ctx);
3755 struct gl_program *prog = &fp->Base.Base;
3756 struct gl_program_parameter_list *params = _mesa_new_parameter_list();
3757 st_src_reg coord, src0;
3758 st_dst_reg dst0;
3759 glsl_to_tgsi_instruction *inst;
3760
3761 /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
3762 v->ctx = original->ctx;
3763 v->prog = prog;
3764 v->glsl_version = original->glsl_version;
3765 v->native_integers = original->native_integers;
3766 v->options = original->options;
3767 v->next_temp = original->next_temp;
3768 v->num_address_regs = original->num_address_regs;
3769 v->samplers_used = prog->SamplersUsed = original->samplers_used;
3770 v->indirect_addr_temps = original->indirect_addr_temps;
3771 v->indirect_addr_consts = original->indirect_addr_consts;
3772 memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
3773
3774 /*
3775 * Get initial pixel color from the texture.
3776 * TEX colorTemp, fragment.texcoord[0], texture[0], 2D;
3777 */
3778 coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
3779 src0 = v->get_temp(glsl_type::vec4_type);
3780 dst0 = st_dst_reg(src0);
3781 inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
3782 inst->sampler = 0;
3783 inst->tex_target = TEXTURE_2D_INDEX;
3784
3785 prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0);
3786 prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */
3787 v->samplers_used |= (1 << 0);
3788
3789 if (scale_and_bias) {
3790 static const gl_state_index scale_state[STATE_LENGTH] =
3791 { STATE_INTERNAL, STATE_PT_SCALE,
3792 (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
3793 static const gl_state_index bias_state[STATE_LENGTH] =
3794 { STATE_INTERNAL, STATE_PT_BIAS,
3795 (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
3796 GLint scale_p, bias_p;
3797 st_src_reg scale, bias;
3798
3799 scale_p = _mesa_add_state_reference(params, scale_state);
3800 bias_p = _mesa_add_state_reference(params, bias_state);
3801
3802 /* MAD colorTemp, colorTemp, scale, bias; */
3803 scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT);
3804 bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT);
3805 inst = v->emit(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias);
3806 }
3807
3808 if (pixel_maps) {
3809 st_src_reg temp = v->get_temp(glsl_type::vec4_type);
3810 st_dst_reg temp_dst = st_dst_reg(temp);
3811
3812 assert(st->pixel_xfer.pixelmap_texture);
3813
3814 /* With a little effort, we can do four pixel map look-ups with
3815 * two TEX instructions:
3816 */
3817
3818 /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */
3819 temp_dst.writemask = WRITEMASK_XY; /* write R,G */
3820 inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
3821 inst->sampler = 1;
3822 inst->tex_target = TEXTURE_2D_INDEX;
3823
3824 /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */
3825 src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
3826 temp_dst.writemask = WRITEMASK_ZW; /* write B,A */
3827 inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
3828 inst->sampler = 1;
3829 inst->tex_target = TEXTURE_2D_INDEX;
3830
3831 prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */
3832 v->samplers_used |= (1 << 1);
3833
3834 /* MOV colorTemp, temp; */
3835 inst = v->emit(NULL, TGSI_OPCODE_MOV, dst0, temp);
3836 }
3837
3838 /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
3839 * new visitor. */
3840 foreach_iter(exec_list_iterator, iter, original->instructions) {
3841 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3842 st_src_reg src_regs[3];
3843
3844 if (inst->dst.file == PROGRAM_OUTPUT)
3845 prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index);
3846
3847 for (int i=0; i<3; i++) {
3848 src_regs[i] = inst->src[i];
3849 if (src_regs[i].file == PROGRAM_INPUT &&
3850 src_regs[i].index == FRAG_ATTRIB_COL0)
3851 {
3852 src_regs[i].file = PROGRAM_TEMPORARY;
3853 src_regs[i].index = src0.index;
3854 }
3855 else if (src_regs[i].file == PROGRAM_INPUT)
3856 prog->InputsRead |= (1 << src_regs[i].index);
3857 }
3858
3859 v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
3860 }
3861
3862 /* Make modifications to fragment program info. */
3863 prog->Parameters = _mesa_combine_parameter_lists(params,
3864 original->prog->Parameters);
3865 prog->Attributes = _mesa_clone_parameter_list(original->prog->Attributes);
3866 prog->Varying = _mesa_clone_parameter_list(original->prog->Varying);
3867 _mesa_free_parameter_list(params);
3868 count_resources(v, prog);
3869 fp->glsl_to_tgsi = v;
3870 }
3871
3872 /**
3873 * Make fragment program for glBitmap:
3874 * Sample the texture and kill the fragment if the bit is 0.
3875 * This program will be combined with the user's fragment program.
3876 *
3877 * Based on make_bitmap_fragment_program in st_cb_bitmap.c.
3878 */
3879 extern "C" void
3880 get_bitmap_visitor(struct st_fragment_program *fp,
3881 glsl_to_tgsi_visitor *original, int samplerIndex)
3882 {
3883 glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
3884 struct st_context *st = st_context(original->ctx);
3885 struct gl_program *prog = &fp->Base.Base;
3886 st_src_reg coord, src0;
3887 st_dst_reg dst0;
3888 glsl_to_tgsi_instruction *inst;
3889
3890 /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
3891 v->ctx = original->ctx;
3892 v->prog = prog;
3893 v->glsl_version = original->glsl_version;
3894 v->native_integers = original->native_integers;
3895 v->options = original->options;
3896 v->next_temp = original->next_temp;
3897 v->num_address_regs = original->num_address_regs;
3898 v->samplers_used = prog->SamplersUsed = original->samplers_used;
3899 v->indirect_addr_temps = original->indirect_addr_temps;
3900 v->indirect_addr_consts = original->indirect_addr_consts;
3901 memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
3902
3903 /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
3904 coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type);
3905 src0 = v->get_temp(glsl_type::vec4_type);
3906 dst0 = st_dst_reg(src0);
3907 inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
3908 inst->sampler = samplerIndex;
3909 inst->tex_target = TEXTURE_2D_INDEX;
3910
3911 prog->InputsRead |= (1 << FRAG_ATTRIB_TEX0);
3912 prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */
3913 v->samplers_used |= (1 << samplerIndex);
3914
3915 /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */
3916 src0.negate = NEGATE_XYZW;
3917 if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM)
3918 src0.swizzle = SWIZZLE_XXXX;
3919 inst = v->emit(NULL, TGSI_OPCODE_KIL, undef_dst, src0);
3920
3921 /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
3922 * new visitor. */
3923 foreach_iter(exec_list_iterator, iter, original->instructions) {
3924 glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
3925 st_src_reg src_regs[3];
3926
3927 if (inst->dst.file == PROGRAM_OUTPUT)
3928 prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index);
3929
3930 for (int i=0; i<3; i++) {
3931 src_regs[i] = inst->src[i];
3932 if (src_regs[i].file == PROGRAM_INPUT)
3933 prog->InputsRead |= (1 << src_regs[i].index);
3934 }
3935
3936 v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
3937 }
3938
3939 /* Make modifications to fragment program info. */
3940 prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters);
3941 prog->Attributes = _mesa_clone_parameter_list(original->prog->Attributes);
3942 prog->Varying = _mesa_clone_parameter_list(original->prog->Varying);
3943 count_resources(v, prog);
3944 fp->glsl_to_tgsi = v;
3945 }
3946
3947 /* ------------------------- TGSI conversion stuff -------------------------- */
3948 struct label {
3949 unsigned branch_target;
3950 unsigned token;
3951 };
3952
3953 /**
3954 * Intermediate state used during shader translation.
3955 */
3956 struct st_translate {
3957 struct ureg_program *ureg;
3958
3959 struct ureg_dst temps[MAX_TEMPS];
3960 struct ureg_src *constants;
3961 struct ureg_src *immediates;
3962 struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
3963 struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
3964 struct ureg_dst address[1];
3965 struct ureg_src samplers[PIPE_MAX_SAMPLERS];
3966 struct ureg_src systemValues[SYSTEM_VALUE_MAX];
3967
3968 /* Extra info for handling point size clamping in vertex shader */
3969 struct ureg_dst pointSizeResult; /**< Actual point size output register */
3970 struct ureg_src pointSizeConst; /**< Point size range constant register */
3971 GLint pointSizeOutIndex; /**< Temp point size output register */
3972 GLboolean prevInstWrotePointSize;
3973
3974 const GLuint *inputMapping;
3975 const GLuint *outputMapping;
3976
3977 /* For every instruction that contains a label (eg CALL), keep
3978 * details so that we can go back afterwards and emit the correct
3979 * tgsi instruction number for each label.
3980 */
3981 struct label *labels;
3982 unsigned labels_size;
3983 unsigned labels_count;
3984
3985 /* Keep a record of the tgsi instruction number that each mesa
3986 * instruction starts at, will be used to fix up labels after
3987 * translation.
3988 */
3989 unsigned *insn;
3990 unsigned insn_size;
3991 unsigned insn_count;
3992
3993 unsigned procType; /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */
3994
3995 boolean error;
3996 };
3997
3998 /** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */
3999 static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
4000 TGSI_SEMANTIC_FACE,
4001 TGSI_SEMANTIC_INSTANCEID
4002 };
4003
4004 /**
4005 * Make note of a branch to a label in the TGSI code.
4006 * After we've emitted all instructions, we'll go over the list
4007 * of labels built here and patch the TGSI code with the actual
4008 * location of each label.
4009 */
4010 static unsigned *get_label(struct st_translate *t, unsigned branch_target)
4011 {
4012 unsigned i;
4013
4014 if (t->labels_count + 1 >= t->labels_size) {
4015 t->labels_size = 1 << (util_logbase2(t->labels_size) + 1);
4016 t->labels = (struct label *)realloc(t->labels,
4017 t->labels_size * sizeof(struct label));
4018 if (t->labels == NULL) {
4019 static unsigned dummy;
4020 t->error = TRUE;
4021 return &dummy;
4022 }
4023 }
4024
4025 i = t->labels_count++;
4026 t->labels[i].branch_target = branch_target;
4027 return &t->labels[i].token;
4028 }
4029
4030 /**
4031 * Called prior to emitting the TGSI code for each instruction.
4032 * Allocate additional space for instructions if needed.
4033 * Update the insn[] array so the next glsl_to_tgsi_instruction points to
4034 * the next TGSI instruction.
4035 */
4036 static void set_insn_start(struct st_translate *t, unsigned start)
4037 {
4038 if (t->insn_count + 1 >= t->insn_size) {
4039 t->insn_size = 1 << (util_logbase2(t->insn_size) + 1);
4040 t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof(t->insn[0]));
4041 if (t->insn == NULL) {
4042 t->error = TRUE;
4043 return;
4044 }
4045 }
4046
4047 t->insn[t->insn_count++] = start;
4048 }
4049
4050 /**
4051 * Map a glsl_to_tgsi constant/immediate to a TGSI immediate.
4052 */
4053 static struct ureg_src
4054 emit_immediate(struct st_translate *t,
4055 gl_constant_value values[4],
4056 int type, int size)
4057 {
4058 struct ureg_program *ureg = t->ureg;
4059
4060 switch(type)
4061 {
4062 case GL_FLOAT:
4063 return ureg_DECL_immediate(ureg, &values[0].f, size);
4064 case GL_INT:
4065 return ureg_DECL_immediate_int(ureg, &values[0].i, size);
4066 case GL_UNSIGNED_INT:
4067 case GL_BOOL:
4068 return ureg_DECL_immediate_uint(ureg, &values[0].u, size);
4069 default:
4070 assert(!"should not get here - type must be float, int, uint, or bool");
4071 return ureg_src_undef();
4072 }
4073 }
4074
4075 /**
4076 * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register.
4077 */
4078 static struct ureg_dst
4079 dst_register(struct st_translate *t,
4080 gl_register_file file,
4081 GLuint index)
4082 {
4083 switch(file) {
4084 case PROGRAM_UNDEFINED:
4085 return ureg_dst_undef();
4086
4087 case PROGRAM_TEMPORARY:
4088 if (ureg_dst_is_undef(t->temps[index]))
4089 t->temps[index] = ureg_DECL_temporary(t->ureg);
4090
4091 return t->temps[index];
4092
4093 case PROGRAM_OUTPUT:
4094 if (t->procType == TGSI_PROCESSOR_VERTEX && index == VERT_RESULT_PSIZ)
4095 t->prevInstWrotePointSize = GL_TRUE;
4096
4097 if (t->procType == TGSI_PROCESSOR_VERTEX)
4098 assert(index < VERT_RESULT_MAX);
4099 else if (t->procType == TGSI_PROCESSOR_FRAGMENT)
4100 assert(index < FRAG_RESULT_MAX);
4101 else
4102 assert(index < GEOM_RESULT_MAX);
4103
4104 assert(t->outputMapping[index] < Elements(t->outputs));
4105
4106 return t->outputs[t->outputMapping[index]];
4107
4108 case PROGRAM_ADDRESS:
4109 return t->address[index];
4110
4111 default:
4112 assert(!"unknown dst register file");
4113 return ureg_dst_undef();
4114 }
4115 }
4116
4117 /**
4118 * Map a glsl_to_tgsi src register to a TGSI ureg_src register.
4119 */
4120 static struct ureg_src
4121 src_register(struct st_translate *t,
4122 gl_register_file file,
4123 GLuint index)
4124 {
4125 switch(file) {
4126 case PROGRAM_UNDEFINED:
4127 return ureg_src_undef();
4128
4129 case PROGRAM_TEMPORARY:
4130 assert(index >= 0);
4131 assert(index < Elements(t->temps));
4132 if (ureg_dst_is_undef(t->temps[index]))
4133 t->temps[index] = ureg_DECL_temporary(t->ureg);
4134 return ureg_src(t->temps[index]);
4135
4136 case PROGRAM_NAMED_PARAM:
4137 case PROGRAM_ENV_PARAM:
4138 case PROGRAM_LOCAL_PARAM:
4139 case PROGRAM_UNIFORM:
4140 assert(index >= 0);
4141 return t->constants[index];
4142 case PROGRAM_STATE_VAR:
4143 case PROGRAM_CONSTANT: /* ie, immediate */
4144 if (index < 0)
4145 return ureg_DECL_constant(t->ureg, 0);
4146 else
4147 return t->constants[index];
4148
4149 case PROGRAM_IMMEDIATE:
4150 return t->immediates[index];
4151
4152 case PROGRAM_INPUT:
4153 assert(t->inputMapping[index] < Elements(t->inputs));
4154 return t->inputs[t->inputMapping[index]];
4155
4156 case PROGRAM_OUTPUT:
4157 assert(t->outputMapping[index] < Elements(t->outputs));
4158 return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */
4159
4160 case PROGRAM_ADDRESS:
4161 return ureg_src(t->address[index]);
4162
4163 case PROGRAM_SYSTEM_VALUE:
4164 assert(index < Elements(t->systemValues));
4165 return t->systemValues[index];
4166
4167 default:
4168 assert(!"unknown src register file");
4169 return ureg_src_undef();
4170 }
4171 }
4172
4173 /**
4174 * Create a TGSI ureg_dst register from an st_dst_reg.
4175 */
4176 static struct ureg_dst
4177 translate_dst(struct st_translate *t,
4178 const st_dst_reg *dst_reg,
4179 bool saturate)
4180 {
4181 struct ureg_dst dst = dst_register(t,
4182 dst_reg->file,
4183 dst_reg->index);
4184
4185 dst = ureg_writemask(dst, dst_reg->writemask);
4186
4187 if (saturate)
4188 dst = ureg_saturate(dst);
4189
4190 if (dst_reg->reladdr != NULL)
4191 dst = ureg_dst_indirect(dst, ureg_src(t->address[0]));
4192
4193 return dst;
4194 }
4195
4196 /**
4197 * Create a TGSI ureg_src register from an st_src_reg.
4198 */
4199 static struct ureg_src
4200 translate_src(struct st_translate *t, const st_src_reg *src_reg)
4201 {
4202 struct ureg_src src = src_register(t, src_reg->file, src_reg->index);
4203
4204 src = ureg_swizzle(src,
4205 GET_SWZ(src_reg->swizzle, 0) & 0x3,
4206 GET_SWZ(src_reg->swizzle, 1) & 0x3,
4207 GET_SWZ(src_reg->swizzle, 2) & 0x3,
4208 GET_SWZ(src_reg->swizzle, 3) & 0x3);
4209
4210 if ((src_reg->negate & 0xf) == NEGATE_XYZW)
4211 src = ureg_negate(src);
4212
4213 if (src_reg->reladdr != NULL) {
4214 /* Normally ureg_src_indirect() would be used here, but a stupid compiler
4215 * bug in g++ makes ureg_src_indirect (an inline C function) erroneously
4216 * set the bit for src.Negate. So we have to do the operation manually
4217 * here to work around the compiler's problems. */
4218 /*src = ureg_src_indirect(src, ureg_src(t->address[0]));*/
4219 struct ureg_src addr = ureg_src(t->address[0]);
4220 src.Indirect = 1;
4221 src.IndirectFile = addr.File;
4222 src.IndirectIndex = addr.Index;
4223 src.IndirectSwizzle = addr.SwizzleX;
4224
4225 if (src_reg->file != PROGRAM_INPUT &&
4226 src_reg->file != PROGRAM_OUTPUT) {
4227 /* If src_reg->index was negative, it was set to zero in
4228 * src_register(). Reassign it now. But don't do this
4229 * for input/output regs since they get remapped while
4230 * const buffers don't.
4231 */
4232 src.Index = src_reg->index;
4233 }
4234 }
4235
4236 return src;
4237 }
4238
4239 static struct tgsi_texture_offset
4240 translate_tex_offset(struct st_translate *t,
4241 const struct tgsi_texture_offset *in_offset)
4242 {
4243 struct tgsi_texture_offset offset;
4244
4245 assert(in_offset->File == PROGRAM_IMMEDIATE);
4246
4247 offset.File = TGSI_FILE_IMMEDIATE;
4248 offset.Index = in_offset->Index;
4249 offset.SwizzleX = in_offset->SwizzleX;
4250 offset.SwizzleY = in_offset->SwizzleY;
4251 offset.SwizzleZ = in_offset->SwizzleZ;
4252
4253 return offset;
4254 }
4255
4256 static void
4257 compile_tgsi_instruction(struct st_translate *t,
4258 const glsl_to_tgsi_instruction *inst)
4259 {
4260 struct ureg_program *ureg = t->ureg;
4261 GLuint i;
4262 struct ureg_dst dst[1];
4263 struct ureg_src src[4];
4264 struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET];
4265
4266 unsigned num_dst;
4267 unsigned num_src;
4268
4269 num_dst = num_inst_dst_regs(inst->op);
4270 num_src = num_inst_src_regs(inst->op);
4271
4272 if (num_dst)
4273 dst[0] = translate_dst(t,
4274 &inst->dst,
4275 inst->saturate);
4276
4277 for (i = 0; i < num_src; i++)
4278 src[i] = translate_src(t, &inst->src[i]);
4279
4280 switch(inst->op) {
4281 case TGSI_OPCODE_BGNLOOP:
4282 case TGSI_OPCODE_CAL:
4283 case TGSI_OPCODE_ELSE:
4284 case TGSI_OPCODE_ENDLOOP:
4285 case TGSI_OPCODE_IF:
4286 assert(num_dst == 0);
4287 ureg_label_insn(ureg,
4288 inst->op,
4289 src, num_src,
4290 get_label(t,
4291 inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0));
4292 return;
4293
4294 case TGSI_OPCODE_TEX:
4295 case TGSI_OPCODE_TXB:
4296 case TGSI_OPCODE_TXD:
4297 case TGSI_OPCODE_TXL:
4298 case TGSI_OPCODE_TXP:
4299 case TGSI_OPCODE_TXQ:
4300 case TGSI_OPCODE_TXF:
4301 src[num_src++] = t->samplers[inst->sampler];
4302 for (i = 0; i < inst->tex_offset_num_offset; i++) {
4303 texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i]);
4304 }
4305 ureg_tex_insn(ureg,
4306 inst->op,
4307 dst, num_dst,
4308 translate_texture_target(inst->tex_target, inst->tex_shadow),
4309 texoffsets, inst->tex_offset_num_offset,
4310 src, num_src);
4311 return;
4312
4313 case TGSI_OPCODE_SCS:
4314 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY);
4315 ureg_insn(ureg, inst->op, dst, num_dst, src, num_src);
4316 break;
4317
4318 default:
4319 ureg_insn(ureg,
4320 inst->op,
4321 dst, num_dst,
4322 src, num_src);
4323 break;
4324 }
4325 }
4326
4327 /**
4328 * Emit the TGSI instructions to adjust the WPOS pixel center convention
4329 * Basically, add (adjX, adjY) to the fragment position.
4330 */
4331 static void
4332 emit_adjusted_wpos(struct st_translate *t,
4333 const struct gl_program *program,
4334 float adjX, float adjY)
4335 {
4336 struct ureg_program *ureg = t->ureg;
4337 struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg);
4338 struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
4339
4340 /* Note that we bias X and Y and pass Z and W through unchanged.
4341 * The shader might also use gl_FragCoord.w and .z.
4342 */
4343 ureg_ADD(ureg, wpos_temp, wpos_input,
4344 ureg_imm4f(ureg, adjX, adjY, 0.0f, 0.0f));
4345
4346 t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
4347 }
4348
4349
4350 /**
4351 * Emit the TGSI instructions for inverting the WPOS y coordinate.
4352 * This code is unavoidable because it also depends on whether
4353 * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM).
4354 */
4355 static void
4356 emit_wpos_inversion(struct st_translate *t,
4357 const struct gl_program *program,
4358 bool invert)
4359 {
4360 struct ureg_program *ureg = t->ureg;
4361
4362 /* Fragment program uses fragment position input.
4363 * Need to replace instances of INPUT[WPOS] with temp T
4364 * where T = INPUT[WPOS] by y is inverted.
4365 */
4366 static const gl_state_index wposTransformState[STATE_LENGTH]
4367 = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM,
4368 (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 };
4369
4370 /* XXX: note we are modifying the incoming shader here! Need to
4371 * do this before emitting the constant decls below, or this
4372 * will be missed:
4373 */
4374 unsigned wposTransConst = _mesa_add_state_reference(program->Parameters,
4375 wposTransformState);
4376
4377 struct ureg_src wpostrans = ureg_DECL_constant(ureg, wposTransConst);
4378 struct ureg_dst wpos_temp;
4379 struct ureg_src wpos_input = t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]];
4380
4381 /* MOV wpos_temp, input[wpos]
4382 */
4383 if (wpos_input.File == TGSI_FILE_TEMPORARY)
4384 wpos_temp = ureg_dst(wpos_input);
4385 else {
4386 wpos_temp = ureg_DECL_temporary(ureg);
4387 ureg_MOV(ureg, wpos_temp, wpos_input);
4388 }
4389
4390 if (invert) {
4391 /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
4392 */
4393 ureg_MAD(ureg,
4394 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y),
4395 wpos_input,
4396 ureg_scalar(wpostrans, 0),
4397 ureg_scalar(wpostrans, 1));
4398 } else {
4399 /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
4400 */
4401 ureg_MAD(ureg,
4402 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y),
4403 wpos_input,
4404 ureg_scalar(wpostrans, 2),
4405 ureg_scalar(wpostrans, 3));
4406 }
4407
4408 /* Use wpos_temp as position input from here on:
4409 */
4410 t->inputs[t->inputMapping[FRAG_ATTRIB_WPOS]] = ureg_src(wpos_temp);
4411 }
4412
4413
4414 /**
4415 * Emit fragment position/ooordinate code.
4416 */
4417 static void
4418 emit_wpos(struct st_context *st,
4419 struct st_translate *t,
4420 const struct gl_program *program,
4421 struct ureg_program *ureg)
4422 {
4423 const struct gl_fragment_program *fp =
4424 (const struct gl_fragment_program *) program;
4425 struct pipe_screen *pscreen = st->pipe->screen;
4426 boolean invert = FALSE;
4427
4428 if (fp->OriginUpperLeft) {
4429 /* Fragment shader wants origin in upper-left */
4430 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
4431 /* the driver supports upper-left origin */
4432 }
4433 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) {
4434 /* the driver supports lower-left origin, need to invert Y */
4435 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
4436 invert = TRUE;
4437 }
4438 else
4439 assert(0);
4440 }
4441 else {
4442 /* Fragment shader wants origin in lower-left */
4443 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT))
4444 /* the driver supports lower-left origin */
4445 ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
4446 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT))
4447 /* the driver supports upper-left origin, need to invert Y */
4448 invert = TRUE;
4449 else
4450 assert(0);
4451 }
4452
4453 if (fp->PixelCenterInteger) {
4454 /* Fragment shader wants pixel center integer */
4455 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER))
4456 /* the driver supports pixel center integer */
4457 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
4458 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER))
4459 /* the driver supports pixel center half integer, need to bias X,Y */
4460 emit_adjusted_wpos(t, program, 0.5f, invert ? 0.5f : -0.5f);
4461 else
4462 assert(0);
4463 }
4464 else {
4465 /* Fragment shader wants pixel center half integer */
4466 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
4467 /* the driver supports pixel center half integer */
4468 }
4469 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
4470 /* the driver supports pixel center integer, need to bias X,Y */
4471 ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
4472 emit_adjusted_wpos(t, program, 0.5f, invert ? -0.5f : 0.5f);
4473 }
4474 else
4475 assert(0);
4476 }
4477
4478 /* we invert after adjustment so that we avoid the MOV to temporary,
4479 * and reuse the adjustment ADD instead */
4480 emit_wpos_inversion(t, program, invert);
4481 }
4482
4483 /**
4484 * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back.
4485 * TGSI uses +1 for front, -1 for back.
4486 * This function converts the TGSI value to the GL value. Simply clamping/
4487 * saturating the value to [0,1] does the job.
4488 */
4489 static void
4490 emit_face_var(struct st_translate *t)
4491 {
4492 struct ureg_program *ureg = t->ureg;
4493 struct ureg_dst face_temp = ureg_DECL_temporary(ureg);
4494 struct ureg_src face_input = t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]];
4495
4496 /* MOV_SAT face_temp, input[face] */
4497 face_temp = ureg_saturate(face_temp);
4498 ureg_MOV(ureg, face_temp, face_input);
4499
4500 /* Use face_temp as face input from here on: */
4501 t->inputs[t->inputMapping[FRAG_ATTRIB_FACE]] = ureg_src(face_temp);
4502 }
4503
4504 static void
4505 emit_edgeflags(struct st_translate *t)
4506 {
4507 struct ureg_program *ureg = t->ureg;
4508 struct ureg_dst edge_dst = t->outputs[t->outputMapping[VERT_RESULT_EDGE]];
4509 struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]];
4510
4511 ureg_MOV(ureg, edge_dst, edge_src);
4512 }
4513
4514 /**
4515 * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format.
4516 * \param program the program to translate
4517 * \param numInputs number of input registers used
4518 * \param inputMapping maps Mesa fragment program inputs to TGSI generic
4519 * input indexes
4520 * \param inputSemanticName the TGSI_SEMANTIC flag for each input
4521 * \param inputSemanticIndex the semantic index (ex: which texcoord) for
4522 * each input
4523 * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
4524 * \param numOutputs number of output registers used
4525 * \param outputMapping maps Mesa fragment program outputs to TGSI
4526 * generic outputs
4527 * \param outputSemanticName the TGSI_SEMANTIC flag for each output
4528 * \param outputSemanticIndex the semantic index (ex: which texcoord) for
4529 * each output
4530 *
4531 * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY
4532 */
4533 extern "C" enum pipe_error
4534 st_translate_program(
4535 struct gl_context *ctx,
4536 uint procType,
4537 struct ureg_program *ureg,
4538 glsl_to_tgsi_visitor *program,
4539 const struct gl_program *proginfo,
4540 GLuint numInputs,
4541 const GLuint inputMapping[],
4542 const ubyte inputSemanticName[],
4543 const ubyte inputSemanticIndex[],
4544 const GLuint interpMode[],
4545 GLuint numOutputs,
4546 const GLuint outputMapping[],
4547 const ubyte outputSemanticName[],
4548 const ubyte outputSemanticIndex[],
4549 boolean passthrough_edgeflags)
4550 {
4551 struct st_translate translate, *t;
4552 unsigned i;
4553 enum pipe_error ret = PIPE_OK;
4554
4555 assert(numInputs <= Elements(t->inputs));
4556 assert(numOutputs <= Elements(t->outputs));
4557
4558 t = &translate;
4559 memset(t, 0, sizeof *t);
4560
4561 t->procType = procType;
4562 t->inputMapping = inputMapping;
4563 t->outputMapping = outputMapping;
4564 t->ureg = ureg;
4565 t->pointSizeOutIndex = -1;
4566 t->prevInstWrotePointSize = GL_FALSE;
4567
4568 /*
4569 * Declare input attributes.
4570 */
4571 if (procType == TGSI_PROCESSOR_FRAGMENT) {
4572 for (i = 0; i < numInputs; i++) {
4573 t->inputs[i] = ureg_DECL_fs_input(ureg,
4574 inputSemanticName[i],
4575 inputSemanticIndex[i],
4576 interpMode[i]);
4577 }
4578
4579 if (proginfo->InputsRead & FRAG_BIT_WPOS) {
4580 /* Must do this after setting up t->inputs, and before
4581 * emitting constant references, below:
4582 */
4583 emit_wpos(st_context(ctx), t, proginfo, ureg);
4584 }
4585
4586 if (proginfo->InputsRead & FRAG_BIT_FACE)
4587 emit_face_var(t);
4588
4589 /*
4590 * Declare output attributes.
4591 */
4592 for (i = 0; i < numOutputs; i++) {
4593 switch (outputSemanticName[i]) {
4594 case TGSI_SEMANTIC_POSITION:
4595 t->outputs[i] = ureg_DECL_output(ureg,
4596 TGSI_SEMANTIC_POSITION, /* Z/Depth */
4597 outputSemanticIndex[i]);
4598 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z);
4599 break;
4600 case TGSI_SEMANTIC_STENCIL:
4601 t->outputs[i] = ureg_DECL_output(ureg,
4602 TGSI_SEMANTIC_STENCIL, /* Stencil */
4603 outputSemanticIndex[i]);
4604 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y);
4605 break;
4606 case TGSI_SEMANTIC_COLOR:
4607 t->outputs[i] = ureg_DECL_output(ureg,
4608 TGSI_SEMANTIC_COLOR,
4609 outputSemanticIndex[i]);
4610 break;
4611 default:
4612 assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR");
4613 return PIPE_ERROR_BAD_INPUT;
4614 }
4615 }
4616 }
4617 else if (procType == TGSI_PROCESSOR_GEOMETRY) {
4618 for (i = 0; i < numInputs; i++) {
4619 t->inputs[i] = ureg_DECL_gs_input(ureg,
4620 i,
4621 inputSemanticName[i],
4622 inputSemanticIndex[i]);
4623 }
4624
4625 for (i = 0; i < numOutputs; i++) {
4626 t->outputs[i] = ureg_DECL_output(ureg,
4627 outputSemanticName[i],
4628 outputSemanticIndex[i]);
4629 }
4630 }
4631 else {
4632 assert(procType == TGSI_PROCESSOR_VERTEX);
4633
4634 for (i = 0; i < numInputs; i++) {
4635 t->inputs[i] = ureg_DECL_vs_input(ureg, i);
4636 }
4637
4638 for (i = 0; i < numOutputs; i++) {
4639 t->outputs[i] = ureg_DECL_output(ureg,
4640 outputSemanticName[i],
4641 outputSemanticIndex[i]);
4642 if ((outputSemanticName[i] == TGSI_SEMANTIC_PSIZE) && proginfo->Id) {
4643 /* Writing to the point size result register requires special
4644 * handling to implement clamping.
4645 */
4646 static const gl_state_index pointSizeClampState[STATE_LENGTH]
4647 = { STATE_INTERNAL, STATE_POINT_SIZE_IMPL_CLAMP, (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 };
4648 /* XXX: note we are modifying the incoming shader here! Need to
4649 * do this before emitting the constant decls below, or this
4650 * will be missed.
4651 */
4652 unsigned pointSizeClampConst =
4653 _mesa_add_state_reference(proginfo->Parameters,
4654 pointSizeClampState);
4655 struct ureg_dst psizregtemp = ureg_DECL_temporary(ureg);
4656 t->pointSizeConst = ureg_DECL_constant(ureg, pointSizeClampConst);
4657 t->pointSizeResult = t->outputs[i];
4658 t->pointSizeOutIndex = i;
4659 t->outputs[i] = psizregtemp;
4660 }
4661 }
4662 if (passthrough_edgeflags)
4663 emit_edgeflags(t);
4664 }
4665
4666 /* Declare address register.
4667 */
4668 if (program->num_address_regs > 0) {
4669 assert(program->num_address_regs == 1);
4670 t->address[0] = ureg_DECL_address(ureg);
4671 }
4672
4673 /* Declare misc input registers
4674 */
4675 {
4676 GLbitfield sysInputs = proginfo->SystemValuesRead;
4677 unsigned numSys = 0;
4678 for (i = 0; sysInputs; i++) {
4679 if (sysInputs & (1 << i)) {
4680 unsigned semName = mesa_sysval_to_semantic[i];
4681 t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0);
4682 numSys++;
4683 sysInputs &= ~(1 << i);
4684 }
4685 }
4686 }
4687
4688 if (program->indirect_addr_temps) {
4689 /* If temps are accessed with indirect addressing, declare temporaries
4690 * in sequential order. Else, we declare them on demand elsewhere.
4691 * (Note: the number of temporaries is equal to program->next_temp)
4692 */
4693 for (i = 0; i < (unsigned)program->next_temp; i++) {
4694 /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */
4695 t->temps[i] = ureg_DECL_temporary(t->ureg);
4696 }
4697 }
4698
4699 /* Emit constants and uniforms. TGSI uses a single index space for these,
4700 * so we put all the translated regs in t->constants.
4701 */
4702 if (proginfo->Parameters) {
4703 t->constants = (struct ureg_src *)CALLOC(proginfo->Parameters->NumParameters * sizeof(t->constants[0]));
4704 if (t->constants == NULL) {
4705 ret = PIPE_ERROR_OUT_OF_MEMORY;
4706 goto out;
4707 }
4708
4709 for (i = 0; i < proginfo->Parameters->NumParameters; i++) {
4710 switch (proginfo->Parameters->Parameters[i].Type) {
4711 case PROGRAM_ENV_PARAM:
4712 case PROGRAM_LOCAL_PARAM:
4713 case PROGRAM_STATE_VAR:
4714 case PROGRAM_NAMED_PARAM:
4715 case PROGRAM_UNIFORM:
4716 t->constants[i] = ureg_DECL_constant(ureg, i);
4717 break;
4718
4719 /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect
4720 * addressing of the const buffer.
4721 * FIXME: Be smarter and recognize param arrays:
4722 * indirect addressing is only valid within the referenced
4723 * array.
4724 */
4725 case PROGRAM_CONSTANT:
4726 if (program->indirect_addr_consts)
4727 t->constants[i] = ureg_DECL_constant(ureg, i);
4728 else
4729 t->constants[i] = emit_immediate(t,
4730 proginfo->Parameters->ParameterValues[i],
4731 proginfo->Parameters->Parameters[i].DataType,
4732 4);
4733 break;
4734 default:
4735 break;
4736 }
4737 }
4738 }
4739
4740 /* Emit immediate values.
4741 */
4742 t->immediates = (struct ureg_src *)CALLOC(program->num_immediates * sizeof(struct ureg_src));
4743 if (t->immediates == NULL) {
4744 ret = PIPE_ERROR_OUT_OF_MEMORY;
4745 goto out;
4746 }
4747 i = 0;
4748 foreach_iter(exec_list_iterator, iter, program->immediates) {
4749 immediate_storage *imm = (immediate_storage *)iter.get();
4750 t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size);
4751 }
4752
4753 /* texture samplers */
4754 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
4755 if (program->samplers_used & (1 << i)) {
4756 t->samplers[i] = ureg_DECL_sampler(ureg, i);
4757 }
4758 }
4759
4760 /* Emit each instruction in turn:
4761 */
4762 foreach_iter(exec_list_iterator, iter, program->instructions) {
4763 set_insn_start(t, ureg_get_instruction_number(ureg));
4764 compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get());
4765
4766 if (t->prevInstWrotePointSize && proginfo->Id) {
4767 /* The previous instruction wrote to the (fake) vertex point size
4768 * result register. Now we need to clamp that value to the min/max
4769 * point size range, putting the result into the real point size
4770 * register.
4771 * Note that we can't do this easily at the end of program due to
4772 * possible early return.
4773 */
4774 set_insn_start(t, ureg_get_instruction_number(ureg));
4775 ureg_MAX(t->ureg,
4776 ureg_writemask(t->outputs[t->pointSizeOutIndex], WRITEMASK_X),
4777 ureg_src(t->outputs[t->pointSizeOutIndex]),
4778 ureg_swizzle(t->pointSizeConst, 1,1,1,1));
4779 ureg_MIN(t->ureg, ureg_writemask(t->pointSizeResult, WRITEMASK_X),
4780 ureg_src(t->outputs[t->pointSizeOutIndex]),
4781 ureg_swizzle(t->pointSizeConst, 2,2,2,2));
4782 }
4783 t->prevInstWrotePointSize = GL_FALSE;
4784 }
4785
4786 /* Fix up all emitted labels:
4787 */
4788 for (i = 0; i < t->labels_count; i++) {
4789 ureg_fixup_label(ureg, t->labels[i].token,
4790 t->insn[t->labels[i].branch_target]);
4791 }
4792
4793 out:
4794 FREE(t->insn);
4795 FREE(t->labels);
4796 FREE(t->constants);
4797 FREE(t->immediates);
4798
4799 if (t->error) {
4800 debug_printf("%s: translate error flag set\n", __FUNCTION__);
4801 }
4802
4803 return ret;
4804 }
4805 /* ----------------------------- End TGSI code ------------------------------ */
4806
4807 /**
4808 * Convert a shader's GLSL IR into a Mesa gl_program, although without
4809 * generating Mesa IR.
4810 */
4811 static struct gl_program *
4812 get_mesa_program(struct gl_context *ctx,
4813 struct gl_shader_program *shader_program,
4814 struct gl_shader *shader)
4815 {
4816 glsl_to_tgsi_visitor* v = new glsl_to_tgsi_visitor();
4817 struct gl_program *prog;
4818 GLenum target;
4819 const char *target_string;
4820 bool progress;
4821 struct gl_shader_compiler_options *options =
4822 &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)];
4823
4824 switch (shader->Type) {
4825 case GL_VERTEX_SHADER:
4826 target = GL_VERTEX_PROGRAM_ARB;
4827 target_string = "vertex";
4828 break;
4829 case GL_FRAGMENT_SHADER:
4830 target = GL_FRAGMENT_PROGRAM_ARB;
4831 target_string = "fragment";
4832 break;
4833 case GL_GEOMETRY_SHADER:
4834 target = GL_GEOMETRY_PROGRAM_NV;
4835 target_string = "geometry";
4836 break;
4837 default:
4838 assert(!"should not be reached");
4839 return NULL;
4840 }
4841
4842 validate_ir_tree(shader->ir);
4843
4844 prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
4845 if (!prog)
4846 return NULL;
4847 prog->Parameters = _mesa_new_parameter_list();
4848 prog->Varying = _mesa_new_parameter_list();
4849 prog->Attributes = _mesa_new_parameter_list();
4850 v->ctx = ctx;
4851 v->prog = prog;
4852 v->shader_program = shader_program;
4853 v->options = options;
4854 v->glsl_version = ctx->Const.GLSLVersion;
4855 v->native_integers = ctx->Const.NativeIntegers;
4856
4857 add_uniforms_to_parameters_list(shader_program, shader, prog);
4858
4859 /* Emit intermediate IR for main(). */
4860 visit_exec_list(shader->ir, v);
4861
4862 /* Now emit bodies for any functions that were used. */
4863 do {
4864 progress = GL_FALSE;
4865
4866 foreach_iter(exec_list_iterator, iter, v->function_signatures) {
4867 function_entry *entry = (function_entry *)iter.get();
4868
4869 if (!entry->bgn_inst) {
4870 v->current_function = entry;
4871
4872 entry->bgn_inst = v->emit(NULL, TGSI_OPCODE_BGNSUB);
4873 entry->bgn_inst->function = entry;
4874
4875 visit_exec_list(&entry->sig->body, v);
4876
4877 glsl_to_tgsi_instruction *last;
4878 last = (glsl_to_tgsi_instruction *)v->instructions.get_tail();
4879 if (last->op != TGSI_OPCODE_RET)
4880 v->emit(NULL, TGSI_OPCODE_RET);
4881
4882 glsl_to_tgsi_instruction *end;
4883 end = v->emit(NULL, TGSI_OPCODE_ENDSUB);
4884 end->function = entry;
4885
4886 progress = GL_TRUE;
4887 }
4888 }
4889 } while (progress);
4890
4891 #if 0
4892 /* Print out some information (for debugging purposes) used by the
4893 * optimization passes. */
4894 for (i=0; i < v->next_temp; i++) {
4895 int fr = v->get_first_temp_read(i);
4896 int fw = v->get_first_temp_write(i);
4897 int lr = v->get_last_temp_read(i);
4898 int lw = v->get_last_temp_write(i);
4899
4900 printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw);
4901 assert(fw <= fr);
4902 }
4903 #endif
4904
4905 /* Remove reads to output registers, and to varyings in vertex shaders. */
4906 v->remove_output_reads(PROGRAM_OUTPUT);
4907 if (target == GL_VERTEX_PROGRAM_ARB)
4908 v->remove_output_reads(PROGRAM_VARYING);
4909
4910 /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
4911 v->simplify_cmp();
4912 v->copy_propagate();
4913 while (v->eliminate_dead_code_advanced());
4914
4915 /* FIXME: These passes to optimize temporary registers don't work when there
4916 * is indirect addressing of the temporary register space. We need proper
4917 * array support so that we don't have to give up these passes in every
4918 * shader that uses arrays.
4919 */
4920 if (!v->indirect_addr_temps) {
4921 v->eliminate_dead_code();
4922 v->merge_registers();
4923 v->renumber_registers();
4924 }
4925
4926 /* Write the END instruction. */
4927 v->emit(NULL, TGSI_OPCODE_END);
4928
4929 if (ctx->Shader.Flags & GLSL_DUMP) {
4930 printf("\n");
4931 printf("GLSL IR for linked %s program %d:\n", target_string,
4932 shader_program->Name);
4933 _mesa_print_ir(shader->ir, NULL);
4934 printf("\n");
4935 printf("\n");
4936 }
4937
4938 prog->Instructions = NULL;
4939 prog->NumInstructions = 0;
4940
4941 do_set_program_inouts(shader->ir, prog);
4942 count_resources(v, prog);
4943
4944 check_resources(ctx, shader_program, v, prog);
4945
4946 _mesa_reference_program(ctx, &shader->Program, prog);
4947
4948 struct st_vertex_program *stvp;
4949 struct st_fragment_program *stfp;
4950 struct st_geometry_program *stgp;
4951
4952 switch (shader->Type) {
4953 case GL_VERTEX_SHADER:
4954 stvp = (struct st_vertex_program *)prog;
4955 stvp->glsl_to_tgsi = v;
4956 break;
4957 case GL_FRAGMENT_SHADER:
4958 stfp = (struct st_fragment_program *)prog;
4959 stfp->glsl_to_tgsi = v;
4960 break;
4961 case GL_GEOMETRY_SHADER:
4962 stgp = (struct st_geometry_program *)prog;
4963 stgp->glsl_to_tgsi = v;
4964 break;
4965 default:
4966 assert(!"should not be reached");
4967 return NULL;
4968 }
4969
4970 return prog;
4971 }
4972
4973 extern "C" {
4974
4975 struct gl_shader *
4976 st_new_shader(struct gl_context *ctx, GLuint name, GLuint type)
4977 {
4978 struct gl_shader *shader;
4979 assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER ||
4980 type == GL_GEOMETRY_SHADER_ARB);
4981 shader = rzalloc(NULL, struct gl_shader);
4982 if (shader) {
4983 shader->Type = type;
4984 shader->Name = name;
4985 _mesa_init_shader(ctx, shader);
4986 }
4987 return shader;
4988 }
4989
4990 struct gl_shader_program *
4991 st_new_shader_program(struct gl_context *ctx, GLuint name)
4992 {
4993 struct gl_shader_program *shProg;
4994 shProg = rzalloc(NULL, struct gl_shader_program);
4995 if (shProg) {
4996 shProg->Name = name;
4997 _mesa_init_shader_program(ctx, shProg);
4998 }
4999 return shProg;
5000 }
5001
5002 /**
5003 * Link a shader.
5004 * Called via ctx->Driver.LinkShader()
5005 * This actually involves converting GLSL IR into an intermediate TGSI-like IR
5006 * with code lowering and other optimizations.
5007 */
5008 GLboolean
5009 st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
5010 {
5011 assert(prog->LinkStatus);
5012
5013 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
5014 if (prog->_LinkedShaders[i] == NULL)
5015 continue;
5016
5017 bool progress;
5018 exec_list *ir = prog->_LinkedShaders[i]->ir;
5019 const struct gl_shader_compiler_options *options =
5020 &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)];
5021
5022 do {
5023 progress = false;
5024
5025 /* Lowering */
5026 do_mat_op_to_vec(ir);
5027 lower_instructions(ir, (MOD_TO_FRACT | DIV_TO_MUL_RCP | EXP_TO_EXP2
5028 | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP
5029 | ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
5030
5031 progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
5032
5033 progress = do_common_optimization(ir, true, options->MaxUnrollIterations) || progress;
5034
5035 progress = lower_quadop_vector(ir, false) || progress;
5036
5037 if (options->MaxIfDepth == 0)
5038 progress = lower_discard(ir) || progress;
5039
5040 progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress;
5041
5042 if (options->EmitNoNoise)
5043 progress = lower_noise(ir) || progress;
5044
5045 /* If there are forms of indirect addressing that the driver
5046 * cannot handle, perform the lowering pass.
5047 */
5048 if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput
5049 || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform)
5050 progress =
5051 lower_variable_index_to_cond_assign(ir,
5052 options->EmitNoIndirectInput,
5053 options->EmitNoIndirectOutput,
5054 options->EmitNoIndirectTemp,
5055 options->EmitNoIndirectUniform)
5056 || progress;
5057
5058 progress = do_vec_index_to_cond_assign(ir) || progress;
5059 } while (progress);
5060
5061 validate_ir_tree(ir);
5062 }
5063
5064 for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
5065 struct gl_program *linked_prog;
5066
5067 if (prog->_LinkedShaders[i] == NULL)
5068 continue;
5069
5070 linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
5071
5072 if (linked_prog) {
5073 bool ok = true;
5074
5075 switch (prog->_LinkedShaders[i]->Type) {
5076 case GL_VERTEX_SHADER:
5077 _mesa_reference_vertprog(ctx, &prog->VertexProgram,
5078 (struct gl_vertex_program *)linked_prog);
5079 ok = ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB,
5080 linked_prog);
5081 if (!ok) {
5082 _mesa_reference_vertprog(ctx, &prog->VertexProgram, NULL);
5083 }
5084 break;
5085 case GL_FRAGMENT_SHADER:
5086 _mesa_reference_fragprog(ctx, &prog->FragmentProgram,
5087 (struct gl_fragment_program *)linked_prog);
5088 ok = ctx->Driver.ProgramStringNotify(ctx, GL_FRAGMENT_PROGRAM_ARB,
5089 linked_prog);
5090 if (!ok) {
5091 _mesa_reference_fragprog(ctx, &prog->FragmentProgram, NULL);
5092 }
5093 break;
5094 case GL_GEOMETRY_SHADER:
5095 _mesa_reference_geomprog(ctx, &prog->GeometryProgram,
5096 (struct gl_geometry_program *)linked_prog);
5097 ok = ctx->Driver.ProgramStringNotify(ctx, GL_GEOMETRY_PROGRAM_NV,
5098 linked_prog);
5099 if (!ok) {
5100 _mesa_reference_geomprog(ctx, &prog->GeometryProgram, NULL);
5101 }
5102 break;
5103 }
5104 if (!ok) {
5105 _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program, NULL);
5106 _mesa_reference_program(ctx, &linked_prog, NULL);
5107 return GL_FALSE;
5108 }
5109 }
5110
5111 _mesa_reference_program(ctx, &linked_prog, NULL);
5112 }
5113
5114 return GL_TRUE;
5115 }
5116
5117 } /* extern "C" */