9174b41d8c9e1279e6ffaf611ab7fc780cff926c
[mesa.git] / src / mesa / state_tracker / st_glsl_to_tgsi.cpp
1 /*
2 * Copyright (C) 2005-2007 Brian Paul All Rights Reserved.
3 * Copyright (C) 2008 VMware, Inc. All Rights Reserved.
4 * Copyright © 2010 Intel Corporation
5 * Copyright © 2011 Bryan Cain
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 * DEALINGS IN THE SOFTWARE.
25 */
26
27 /**
28 * \file glsl_to_tgsi.cpp
29 *
30 * Translate GLSL IR to TGSI.
31 */
32
33 #include "st_glsl_to_tgsi.h"
34
35 #include "glsl_parser_extras.h"
36 #include "ir_optimization.h"
37
38 #include "main/errors.h"
39 #include "main/shaderobj.h"
40 #include "main/uniforms.h"
41 #include "main/shaderapi.h"
42 #include "program/prog_instruction.h"
43 #include "program/sampler.h"
44
45 #include "pipe/p_context.h"
46 #include "pipe/p_screen.h"
47 #include "tgsi/tgsi_ureg.h"
48 #include "tgsi/tgsi_info.h"
49 #include "util/u_math.h"
50 #include "util/u_memory.h"
51 #include "st_program.h"
52 #include "st_mesa_to_tgsi.h"
53
54
55 #define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX
56 #define PROGRAM_ANY_CONST ((1 << PROGRAM_STATE_VAR) | \
57 (1 << PROGRAM_CONSTANT) | \
58 (1 << PROGRAM_UNIFORM))
59
60 #define MAX_GLSL_TEXTURE_OFFSET 4
61
62 class st_src_reg;
63 class st_dst_reg;
64
65 static int swizzle_for_size(int size);
66
67 /**
68 * This struct is a corresponding struct to TGSI ureg_src.
69 */
70 class st_src_reg {
71 public:
72 st_src_reg(gl_register_file file, int index, const glsl_type *type)
73 {
74 this->file = file;
75 this->index = index;
76 if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
77 this->swizzle = swizzle_for_size(type->vector_elements);
78 else
79 this->swizzle = SWIZZLE_XYZW;
80 this->negate = 0;
81 this->index2D = 0;
82 this->type = type ? type->base_type : GLSL_TYPE_ERROR;
83 this->reladdr = NULL;
84 this->reladdr2 = NULL;
85 this->has_index2 = false;
86 this->double_reg2 = false;
87 this->array_id = 0;
88 }
89
90 st_src_reg(gl_register_file file, int index, int type)
91 {
92 this->type = type;
93 this->file = file;
94 this->index = index;
95 this->index2D = 0;
96 this->swizzle = SWIZZLE_XYZW;
97 this->negate = 0;
98 this->reladdr = NULL;
99 this->reladdr2 = NULL;
100 this->has_index2 = false;
101 this->double_reg2 = false;
102 this->array_id = 0;
103 }
104
105 st_src_reg(gl_register_file file, int index, int type, int index2D)
106 {
107 this->type = type;
108 this->file = file;
109 this->index = index;
110 this->index2D = index2D;
111 this->swizzle = SWIZZLE_XYZW;
112 this->negate = 0;
113 this->reladdr = NULL;
114 this->reladdr2 = NULL;
115 this->has_index2 = false;
116 this->double_reg2 = false;
117 this->array_id = 0;
118 }
119
120 st_src_reg()
121 {
122 this->type = GLSL_TYPE_ERROR;
123 this->file = PROGRAM_UNDEFINED;
124 this->index = 0;
125 this->index2D = 0;
126 this->swizzle = 0;
127 this->negate = 0;
128 this->reladdr = NULL;
129 this->reladdr2 = NULL;
130 this->has_index2 = false;
131 this->double_reg2 = false;
132 this->array_id = 0;
133 }
134
135 explicit st_src_reg(st_dst_reg reg);
136
137 gl_register_file file; /**< PROGRAM_* from Mesa */
138 int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */
139 int index2D;
140 GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
141 int negate; /**< NEGATE_XYZW mask from mesa */
142 int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
143 /** Register index should be offset by the integer in this reg. */
144 st_src_reg *reladdr;
145 st_src_reg *reladdr2;
146 bool has_index2;
147 /*
148 * Is this the second half of a double register pair?
149 * currently used for input mapping only.
150 */
151 bool double_reg2;
152 unsigned array_id;
153 };
154
155 class st_dst_reg {
156 public:
157 st_dst_reg(gl_register_file file, int writemask, int type, int index)
158 {
159 this->file = file;
160 this->index = index;
161 this->index2D = 0;
162 this->writemask = writemask;
163 this->cond_mask = COND_TR;
164 this->reladdr = NULL;
165 this->reladdr2 = NULL;
166 this->has_index2 = false;
167 this->type = type;
168 this->array_id = 0;
169 }
170
171 st_dst_reg(gl_register_file file, int writemask, int type)
172 {
173 this->file = file;
174 this->index = 0;
175 this->index2D = 0;
176 this->writemask = writemask;
177 this->cond_mask = COND_TR;
178 this->reladdr = NULL;
179 this->reladdr2 = NULL;
180 this->has_index2 = false;
181 this->type = type;
182 this->array_id = 0;
183 }
184
185 st_dst_reg()
186 {
187 this->type = GLSL_TYPE_ERROR;
188 this->file = PROGRAM_UNDEFINED;
189 this->index = 0;
190 this->index2D = 0;
191 this->writemask = 0;
192 this->cond_mask = COND_TR;
193 this->reladdr = NULL;
194 this->reladdr2 = NULL;
195 this->has_index2 = false;
196 this->array_id = 0;
197 }
198
199 explicit st_dst_reg(st_src_reg reg);
200
201 gl_register_file file; /**< PROGRAM_* from Mesa */
202 int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */
203 int index2D;
204 int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
205 GLuint cond_mask:4;
206 int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
207 /** Register index should be offset by the integer in this reg. */
208 st_src_reg *reladdr;
209 st_src_reg *reladdr2;
210 bool has_index2;
211 unsigned array_id;
212 };
213
214 st_src_reg::st_src_reg(st_dst_reg reg)
215 {
216 this->type = reg.type;
217 this->file = reg.file;
218 this->index = reg.index;
219 this->swizzle = SWIZZLE_XYZW;
220 this->negate = 0;
221 this->reladdr = reg.reladdr;
222 this->index2D = reg.index2D;
223 this->reladdr2 = reg.reladdr2;
224 this->has_index2 = reg.has_index2;
225 this->double_reg2 = false;
226 this->array_id = reg.array_id;
227 }
228
229 st_dst_reg::st_dst_reg(st_src_reg reg)
230 {
231 this->type = reg.type;
232 this->file = reg.file;
233 this->index = reg.index;
234 this->writemask = WRITEMASK_XYZW;
235 this->cond_mask = COND_TR;
236 this->reladdr = reg.reladdr;
237 this->index2D = reg.index2D;
238 this->reladdr2 = reg.reladdr2;
239 this->has_index2 = reg.has_index2;
240 this->array_id = reg.array_id;
241 }
242
243 class glsl_to_tgsi_instruction : public exec_node {
244 public:
245 DECLARE_RALLOC_CXX_OPERATORS(glsl_to_tgsi_instruction)
246
247 unsigned op;
248 st_dst_reg dst[2];
249 st_src_reg src[4];
250 /** Pointer to the ir source this tree came from for debugging */
251 ir_instruction *ir;
252 GLboolean cond_update;
253 bool saturate;
254 st_src_reg sampler; /**< sampler register */
255 int sampler_array_size; /**< 1-based size of sampler array, 1 if not array */
256 int tex_target; /**< One of TEXTURE_*_INDEX */
257 glsl_base_type tex_type;
258 GLboolean tex_shadow;
259
260 st_src_reg tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
261 unsigned tex_offset_num_offset;
262 int dead_mask; /**< Used in dead code elimination */
263
264 class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */
265 const struct tgsi_opcode_info *info;
266 };
267
268 class variable_storage : public exec_node {
269 public:
270 variable_storage(ir_variable *var, gl_register_file file, int index,
271 unsigned array_id = 0)
272 : file(file), index(index), var(var), array_id(array_id)
273 {
274 /* empty */
275 }
276
277 gl_register_file file;
278 int index;
279 ir_variable *var; /* variable that maps to this, if any */
280 unsigned array_id;
281 };
282
283 class immediate_storage : public exec_node {
284 public:
285 immediate_storage(gl_constant_value *values, int size32, int type)
286 {
287 memcpy(this->values, values, size32 * sizeof(gl_constant_value));
288 this->size32 = size32;
289 this->type = type;
290 }
291
292 /* doubles are stored across 2 gl_constant_values */
293 gl_constant_value values[4];
294 int size32; /**< Number of 32-bit components (1-4) */
295 int type; /**< GL_DOUBLE, GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */
296 };
297
298 class function_entry : public exec_node {
299 public:
300 ir_function_signature *sig;
301
302 /**
303 * identifier of this function signature used by the program.
304 *
305 * At the point that TGSI instructions for function calls are
306 * generated, we don't know the address of the first instruction of
307 * the function body. So we make the BranchTarget that is called a
308 * small integer and rewrite them during set_branchtargets().
309 */
310 int sig_id;
311
312 /**
313 * Pointer to first instruction of the function body.
314 *
315 * Set during function body emits after main() is processed.
316 */
317 glsl_to_tgsi_instruction *bgn_inst;
318
319 /**
320 * Index of the first instruction of the function body in actual TGSI.
321 *
322 * Set after conversion from glsl_to_tgsi_instruction to TGSI.
323 */
324 int inst;
325
326 /** Storage for the return value. */
327 st_src_reg return_reg;
328 };
329
330 static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR);
331 static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR);
332
333 struct array_decl {
334 unsigned mesa_index;
335 unsigned array_id;
336 unsigned array_size;
337 };
338
339 struct glsl_to_tgsi_visitor : public ir_visitor {
340 public:
341 glsl_to_tgsi_visitor();
342 ~glsl_to_tgsi_visitor();
343
344 function_entry *current_function;
345
346 struct gl_context *ctx;
347 struct gl_program *prog;
348 struct gl_shader_program *shader_program;
349 struct gl_shader *shader;
350 struct gl_shader_compiler_options *options;
351
352 int next_temp;
353
354 unsigned *array_sizes;
355 unsigned max_num_arrays;
356 unsigned next_array;
357
358 struct array_decl input_arrays[PIPE_MAX_SHADER_INPUTS];
359 unsigned num_input_arrays;
360 struct array_decl output_arrays[PIPE_MAX_SHADER_OUTPUTS];
361 unsigned num_output_arrays;
362
363 int num_address_regs;
364 int samplers_used;
365 glsl_base_type sampler_types[PIPE_MAX_SAMPLERS];
366 int sampler_targets[PIPE_MAX_SAMPLERS]; /**< One of TGSI_TEXTURE_* */
367 bool indirect_addr_consts;
368 int wpos_transform_const;
369
370 int glsl_version;
371 bool native_integers;
372 bool have_sqrt;
373 bool have_fma;
374
375 variable_storage *find_variable_storage(ir_variable *var);
376
377 int add_constant(gl_register_file file, gl_constant_value values[8],
378 int size, int datatype, GLuint *swizzle_out);
379
380 function_entry *get_function_signature(ir_function_signature *sig);
381
382 st_src_reg get_temp(const glsl_type *type);
383 void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr);
384
385 st_src_reg st_src_reg_for_double(double val);
386 st_src_reg st_src_reg_for_float(float val);
387 st_src_reg st_src_reg_for_int(int val);
388 st_src_reg st_src_reg_for_type(int type, int val);
389
390 /**
391 * \name Visit methods
392 *
393 * As typical for the visitor pattern, there must be one \c visit method for
394 * each concrete subclass of \c ir_instruction. Virtual base classes within
395 * the hierarchy should not have \c visit methods.
396 */
397 /*@{*/
398 virtual void visit(ir_variable *);
399 virtual void visit(ir_loop *);
400 virtual void visit(ir_loop_jump *);
401 virtual void visit(ir_function_signature *);
402 virtual void visit(ir_function *);
403 virtual void visit(ir_expression *);
404 virtual void visit(ir_swizzle *);
405 virtual void visit(ir_dereference_variable *);
406 virtual void visit(ir_dereference_array *);
407 virtual void visit(ir_dereference_record *);
408 virtual void visit(ir_assignment *);
409 virtual void visit(ir_constant *);
410 virtual void visit(ir_call *);
411 virtual void visit(ir_return *);
412 virtual void visit(ir_discard *);
413 virtual void visit(ir_texture *);
414 virtual void visit(ir_if *);
415 virtual void visit(ir_emit_vertex *);
416 virtual void visit(ir_end_primitive *);
417 virtual void visit(ir_barrier *);
418 /*@}*/
419
420 st_src_reg result;
421
422 /** List of variable_storage */
423 exec_list variables;
424
425 /** List of immediate_storage */
426 exec_list immediates;
427 unsigned num_immediates;
428
429 /** List of function_entry */
430 exec_list function_signatures;
431 int next_signature_id;
432
433 /** List of glsl_to_tgsi_instruction */
434 exec_list instructions;
435
436 glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op,
437 st_dst_reg dst = undef_dst,
438 st_src_reg src0 = undef_src,
439 st_src_reg src1 = undef_src,
440 st_src_reg src2 = undef_src,
441 st_src_reg src3 = undef_src);
442
443 glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op,
444 st_dst_reg dst, st_dst_reg dst1,
445 st_src_reg src0 = undef_src,
446 st_src_reg src1 = undef_src,
447 st_src_reg src2 = undef_src,
448 st_src_reg src3 = undef_src);
449
450 unsigned get_opcode(ir_instruction *ir, unsigned op,
451 st_dst_reg dst,
452 st_src_reg src0, st_src_reg src1);
453
454 /**
455 * Emit the correct dot-product instruction for the type of arguments
456 */
457 glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir,
458 st_dst_reg dst,
459 st_src_reg src0,
460 st_src_reg src1,
461 unsigned elements);
462
463 void emit_scalar(ir_instruction *ir, unsigned op,
464 st_dst_reg dst, st_src_reg src0);
465
466 void emit_scalar(ir_instruction *ir, unsigned op,
467 st_dst_reg dst, st_src_reg src0, st_src_reg src1);
468
469 void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0);
470
471 bool try_emit_mad(ir_expression *ir,
472 int mul_operand);
473 bool try_emit_mad_for_and_not(ir_expression *ir,
474 int mul_operand);
475
476 void emit_swz(ir_expression *ir);
477
478 bool process_move_condition(ir_rvalue *ir);
479
480 void simplify_cmp(void);
481
482 void rename_temp_register(int index, int new_index);
483 void get_first_temp_read(int *first_reads);
484 void get_last_temp_read_first_temp_write(int *last_reads, int *first_writes);
485 void get_last_temp_write(int *last_writes);
486
487 void copy_propagate(void);
488 int eliminate_dead_code(void);
489
490 void merge_two_dsts(void);
491 void merge_registers(void);
492 void renumber_registers(void);
493
494 void emit_block_mov(ir_assignment *ir, const struct glsl_type *type,
495 st_dst_reg *l, st_src_reg *r,
496 st_src_reg *cond, bool cond_swap);
497
498 void *mem_ctx;
499 };
500
501 static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 0);
502 static st_dst_reg address_reg2 = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 1);
503 static st_dst_reg sampler_reladdr = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 2);
504
505 static void
506 fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3);
507
508 static void
509 fail_link(struct gl_shader_program *prog, const char *fmt, ...)
510 {
511 va_list args;
512 va_start(args, fmt);
513 ralloc_vasprintf_append(&prog->InfoLog, fmt, args);
514 va_end(args);
515
516 prog->LinkStatus = GL_FALSE;
517 }
518
519 static int
520 swizzle_for_size(int size)
521 {
522 static const int size_swizzles[4] = {
523 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
524 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
525 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
526 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
527 };
528
529 assert((size >= 1) && (size <= 4));
530 return size_swizzles[size - 1];
531 }
532
533 static unsigned
534 num_inst_dst_regs(const glsl_to_tgsi_instruction *op)
535 {
536 return op->info->num_dst;
537 }
538
539 static unsigned
540 num_inst_src_regs(const glsl_to_tgsi_instruction *op)
541 {
542 return op->info->is_tex ? op->info->num_src - 1 : op->info->num_src;
543 }
544
545 glsl_to_tgsi_instruction *
546 glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
547 st_dst_reg dst, st_dst_reg dst1,
548 st_src_reg src0, st_src_reg src1,
549 st_src_reg src2, st_src_reg src3)
550 {
551 glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction();
552 int num_reladdr = 0, i, j;
553
554 op = get_opcode(ir, op, dst, src0, src1);
555
556 /* If we have to do relative addressing, we want to load the ARL
557 * reg directly for one of the regs, and preload the other reladdr
558 * sources into temps.
559 */
560 num_reladdr += dst.reladdr != NULL || dst.reladdr2;
561 num_reladdr += dst1.reladdr != NULL || dst1.reladdr2;
562 num_reladdr += src0.reladdr != NULL || src0.reladdr2 != NULL;
563 num_reladdr += src1.reladdr != NULL || src1.reladdr2 != NULL;
564 num_reladdr += src2.reladdr != NULL || src2.reladdr2 != NULL;
565 num_reladdr += src3.reladdr != NULL || src3.reladdr2 != NULL;
566
567 reladdr_to_temp(ir, &src3, &num_reladdr);
568 reladdr_to_temp(ir, &src2, &num_reladdr);
569 reladdr_to_temp(ir, &src1, &num_reladdr);
570 reladdr_to_temp(ir, &src0, &num_reladdr);
571
572 if (dst.reladdr || dst.reladdr2) {
573 if (dst.reladdr)
574 emit_arl(ir, address_reg, *dst.reladdr);
575 if (dst.reladdr2)
576 emit_arl(ir, address_reg2, *dst.reladdr2);
577 num_reladdr--;
578 }
579 if (dst1.reladdr) {
580 emit_arl(ir, address_reg, *dst1.reladdr);
581 num_reladdr--;
582 }
583 assert(num_reladdr == 0);
584
585 inst->op = op;
586 inst->info = tgsi_get_opcode_info(op);
587 inst->dst[0] = dst;
588 inst->dst[1] = dst1;
589 inst->src[0] = src0;
590 inst->src[1] = src1;
591 inst->src[2] = src2;
592 inst->src[3] = src3;
593 inst->ir = ir;
594 inst->dead_mask = 0;
595 /* default to float, for paths where this is not initialized
596 * (since 0==UINT which is likely wrong):
597 */
598 inst->tex_type = GLSL_TYPE_FLOAT;
599
600 inst->function = NULL;
601
602 /* Update indirect addressing status used by TGSI */
603 if (dst.reladdr || dst.reladdr2) {
604 switch(dst.file) {
605 case PROGRAM_STATE_VAR:
606 case PROGRAM_CONSTANT:
607 case PROGRAM_UNIFORM:
608 this->indirect_addr_consts = true;
609 break;
610 case PROGRAM_IMMEDIATE:
611 assert(!"immediates should not have indirect addressing");
612 break;
613 default:
614 break;
615 }
616 }
617 else {
618 for (i = 0; i < 4; i++) {
619 if(inst->src[i].reladdr) {
620 switch(inst->src[i].file) {
621 case PROGRAM_STATE_VAR:
622 case PROGRAM_CONSTANT:
623 case PROGRAM_UNIFORM:
624 this->indirect_addr_consts = true;
625 break;
626 case PROGRAM_IMMEDIATE:
627 assert(!"immediates should not have indirect addressing");
628 break;
629 default:
630 break;
631 }
632 }
633 }
634 }
635
636 this->instructions.push_tail(inst);
637
638 /*
639 * This section contains the double processing.
640 * GLSL just represents doubles as single channel values,
641 * however most HW and TGSI represent doubles as pairs of register channels.
642 *
643 * so we have to fixup destination writemask/index and src swizzle/indexes.
644 * dest writemasks need to translate from single channel write mask
645 * to a dual-channel writemask, but also need to modify the index,
646 * if we are touching the Z,W fields in the pre-translated writemask.
647 *
648 * src channels have similiar index modifications along with swizzle
649 * changes to we pick the XY, ZW pairs from the correct index.
650 *
651 * GLSL [0].x -> TGSI [0].xy
652 * GLSL [0].y -> TGSI [0].zw
653 * GLSL [0].z -> TGSI [1].xy
654 * GLSL [0].w -> TGSI [1].zw
655 */
656 if (inst->dst[0].type == GLSL_TYPE_DOUBLE || inst->dst[1].type == GLSL_TYPE_DOUBLE ||
657 inst->src[0].type == GLSL_TYPE_DOUBLE) {
658 glsl_to_tgsi_instruction *dinst = NULL;
659 int initial_src_swz[4], initial_src_idx[4];
660 int initial_dst_idx[2], initial_dst_writemask[2];
661 /* select the writemask for dst0 or dst1 */
662 unsigned writemask = inst->dst[0].file == PROGRAM_UNDEFINED ? inst->dst[1].writemask : inst->dst[0].writemask;
663
664 /* copy out the writemask, index and swizzles for all src/dsts. */
665 for (j = 0; j < 2; j++) {
666 initial_dst_writemask[j] = inst->dst[j].writemask;
667 initial_dst_idx[j] = inst->dst[j].index;
668 }
669
670 for (j = 0; j < 4; j++) {
671 initial_src_swz[j] = inst->src[j].swizzle;
672 initial_src_idx[j] = inst->src[j].index;
673 }
674
675 /*
676 * scan all the components in the dst writemask
677 * generate an instruction for each of them if required.
678 */
679 while (writemask) {
680
681 int i = u_bit_scan(&writemask);
682
683 /* first time use previous instruction */
684 if (dinst == NULL) {
685 dinst = inst;
686 } else {
687 /* create a new instructions for subsequent attempts */
688 dinst = new(mem_ctx) glsl_to_tgsi_instruction();
689 *dinst = *inst;
690 dinst->next = NULL;
691 dinst->prev = NULL;
692 this->instructions.push_tail(dinst);
693 }
694
695 /* modify the destination if we are splitting */
696 for (j = 0; j < 2; j++) {
697 if (dinst->dst[j].type == GLSL_TYPE_DOUBLE) {
698 dinst->dst[j].writemask = (i & 1) ? WRITEMASK_ZW : WRITEMASK_XY;
699 dinst->dst[j].index = initial_dst_idx[j];
700 if (i > 1)
701 dinst->dst[j].index++;
702 } else {
703 /* if we aren't writing to a double, just get the bit of the initial writemask
704 for this channel */
705 dinst->dst[j].writemask = initial_dst_writemask[j] & (1 << i);
706 }
707 }
708
709 /* modify the src registers */
710 for (j = 0; j < 4; j++) {
711 int swz = GET_SWZ(initial_src_swz[j], i);
712
713 if (dinst->src[j].type == GLSL_TYPE_DOUBLE) {
714 dinst->src[j].index = initial_src_idx[j];
715 if (swz > 1) {
716 dinst->src[j].double_reg2 = true;
717 dinst->src[j].index++;
718 }
719
720 if (swz & 1)
721 dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
722 else
723 dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y);
724
725 } else {
726 /* some opcodes are special case in what they use as sources
727 - F2D is a float src0, DLDEXP is integer src1 */
728 if (op == TGSI_OPCODE_F2D ||
729 op == TGSI_OPCODE_DLDEXP ||
730 (op == TGSI_OPCODE_UCMP && dinst->dst[0].type == GLSL_TYPE_DOUBLE)) {
731 dinst->src[j].swizzle = MAKE_SWIZZLE4(swz, swz, swz, swz);
732 }
733 }
734 }
735 }
736 inst = dinst;
737 }
738
739
740 return inst;
741 }
742
743 glsl_to_tgsi_instruction *
744 glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
745 st_dst_reg dst,
746 st_src_reg src0, st_src_reg src1,
747 st_src_reg src2, st_src_reg src3)
748 {
749 return emit_asm(ir, op, dst, undef_dst, src0, src1, src2, src3);
750 }
751
752 /**
753 * Determines whether to use an integer, unsigned integer, or float opcode
754 * based on the operands and input opcode, then emits the result.
755 */
756 unsigned
757 glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
758 st_dst_reg dst,
759 st_src_reg src0, st_src_reg src1)
760 {
761 int type = GLSL_TYPE_FLOAT;
762
763 if (op == TGSI_OPCODE_MOV)
764 return op;
765
766 assert(src0.type != GLSL_TYPE_ARRAY);
767 assert(src0.type != GLSL_TYPE_STRUCT);
768 assert(src1.type != GLSL_TYPE_ARRAY);
769 assert(src1.type != GLSL_TYPE_STRUCT);
770
771 if (src0.type == GLSL_TYPE_DOUBLE || src1.type == GLSL_TYPE_DOUBLE)
772 type = GLSL_TYPE_DOUBLE;
773 else if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
774 type = GLSL_TYPE_FLOAT;
775 else if (native_integers)
776 type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type;
777
778 #define case5(c, f, i, u, d) \
779 case TGSI_OPCODE_##c: \
780 if (type == GLSL_TYPE_DOUBLE) \
781 op = TGSI_OPCODE_##d; \
782 else if (type == GLSL_TYPE_INT) \
783 op = TGSI_OPCODE_##i; \
784 else if (type == GLSL_TYPE_UINT) \
785 op = TGSI_OPCODE_##u; \
786 else \
787 op = TGSI_OPCODE_##f; \
788 break;
789
790 #define case4(c, f, i, u) \
791 case TGSI_OPCODE_##c: \
792 if (type == GLSL_TYPE_INT) \
793 op = TGSI_OPCODE_##i; \
794 else if (type == GLSL_TYPE_UINT) \
795 op = TGSI_OPCODE_##u; \
796 else \
797 op = TGSI_OPCODE_##f; \
798 break;
799
800 #define case3(f, i, u) case4(f, f, i, u)
801 #define case4d(f, i, u, d) case5(f, f, i, u, d)
802 #define case3fid(f, i, d) case5(f, f, i, i, d)
803 #define case2fi(f, i) case4(f, f, i, i)
804 #define case2iu(i, u) case4(i, LAST, i, u)
805
806 #define casecomp(c, f, i, u, d) \
807 case TGSI_OPCODE_##c: \
808 if (type == GLSL_TYPE_DOUBLE) \
809 op = TGSI_OPCODE_##d; \
810 else if (type == GLSL_TYPE_INT || type == GLSL_TYPE_SUBROUTINE) \
811 op = TGSI_OPCODE_##i; \
812 else if (type == GLSL_TYPE_UINT) \
813 op = TGSI_OPCODE_##u; \
814 else if (native_integers) \
815 op = TGSI_OPCODE_##f; \
816 else \
817 op = TGSI_OPCODE_##c; \
818 break;
819
820 switch(op) {
821 case3fid(ADD, UADD, DADD);
822 case3fid(MUL, UMUL, DMUL);
823 case3fid(MAD, UMAD, DMAD);
824 case3fid(FMA, UMAD, DFMA);
825 case3(DIV, IDIV, UDIV);
826 case4d(MAX, IMAX, UMAX, DMAX);
827 case4d(MIN, IMIN, UMIN, DMIN);
828 case2iu(MOD, UMOD);
829
830 casecomp(SEQ, FSEQ, USEQ, USEQ, DSEQ);
831 casecomp(SNE, FSNE, USNE, USNE, DSNE);
832 casecomp(SGE, FSGE, ISGE, USGE, DSGE);
833 casecomp(SLT, FSLT, ISLT, USLT, DSLT);
834
835 case2iu(ISHR, USHR);
836
837 case3fid(SSG, ISSG, DSSG);
838 case3fid(ABS, IABS, DABS);
839
840 case2iu(IBFE, UBFE);
841 case2iu(IMSB, UMSB);
842 case2iu(IMUL_HI, UMUL_HI);
843
844 case3fid(SQRT, SQRT, DSQRT);
845
846 case3fid(RCP, RCP, DRCP);
847 case3fid(RSQ, RSQ, DRSQ);
848
849 case3fid(FRC, FRC, DFRAC);
850 case3fid(TRUNC, TRUNC, DTRUNC);
851 case3fid(CEIL, CEIL, DCEIL);
852 case3fid(FLR, FLR, DFLR);
853 case3fid(ROUND, ROUND, DROUND);
854
855 default: break;
856 }
857
858 assert(op != TGSI_OPCODE_LAST);
859 return op;
860 }
861
862 glsl_to_tgsi_instruction *
863 glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
864 st_dst_reg dst, st_src_reg src0, st_src_reg src1,
865 unsigned elements)
866 {
867 static const unsigned dot_opcodes[] = {
868 TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4
869 };
870
871 return emit_asm(ir, dot_opcodes[elements - 2], dst, src0, src1);
872 }
873
874 /**
875 * Emits TGSI scalar opcodes to produce unique answers across channels.
876 *
877 * Some TGSI opcodes are scalar-only, like ARB_fp/vp. The src X
878 * channel determines the result across all channels. So to do a vec4
879 * of this operation, we want to emit a scalar per source channel used
880 * to produce dest channels.
881 */
882 void
883 glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
884 st_dst_reg dst,
885 st_src_reg orig_src0, st_src_reg orig_src1)
886 {
887 int i, j;
888 int done_mask = ~dst.writemask;
889
890 /* TGSI RCP is a scalar operation splatting results to all channels,
891 * like ARB_fp/vp. So emit as many RCPs as necessary to cover our
892 * dst channels.
893 */
894 for (i = 0; i < 4; i++) {
895 GLuint this_mask = (1 << i);
896 st_src_reg src0 = orig_src0;
897 st_src_reg src1 = orig_src1;
898
899 if (done_mask & this_mask)
900 continue;
901
902 GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
903 GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
904 for (j = i + 1; j < 4; j++) {
905 /* If there is another enabled component in the destination that is
906 * derived from the same inputs, generate its value on this pass as
907 * well.
908 */
909 if (!(done_mask & (1 << j)) &&
910 GET_SWZ(src0.swizzle, j) == src0_swiz &&
911 GET_SWZ(src1.swizzle, j) == src1_swiz) {
912 this_mask |= (1 << j);
913 }
914 }
915 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
916 src0_swiz, src0_swiz);
917 src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
918 src1_swiz, src1_swiz);
919
920 dst.writemask = this_mask;
921 emit_asm(ir, op, dst, src0, src1);
922 done_mask |= this_mask;
923 }
924 }
925
926 void
927 glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
928 st_dst_reg dst, st_src_reg src0)
929 {
930 st_src_reg undef = undef_src;
931
932 undef.swizzle = SWIZZLE_XXXX;
933
934 emit_scalar(ir, op, dst, src0, undef);
935 }
936
937 void
938 glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
939 st_dst_reg dst, st_src_reg src0)
940 {
941 int op = TGSI_OPCODE_ARL;
942
943 if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT)
944 op = TGSI_OPCODE_UARL;
945
946 assert(dst.file == PROGRAM_ADDRESS);
947 if (dst.index >= this->num_address_regs)
948 this->num_address_regs = dst.index + 1;
949
950 emit_asm(NULL, op, dst, src0);
951 }
952
953 int
954 glsl_to_tgsi_visitor::add_constant(gl_register_file file,
955 gl_constant_value values[8], int size, int datatype,
956 GLuint *swizzle_out)
957 {
958 if (file == PROGRAM_CONSTANT) {
959 return _mesa_add_typed_unnamed_constant(this->prog->Parameters, values,
960 size, datatype, swizzle_out);
961 }
962
963 assert(file == PROGRAM_IMMEDIATE);
964
965 int index = 0;
966 immediate_storage *entry;
967 int size32 = size * (datatype == GL_DOUBLE ? 2 : 1);
968 int i;
969
970 /* Search immediate storage to see if we already have an identical
971 * immediate that we can use instead of adding a duplicate entry.
972 */
973 foreach_in_list(immediate_storage, entry, &this->immediates) {
974 immediate_storage *tmp = entry;
975
976 for (i = 0; i * 4 < size32; i++) {
977 int slot_size = MIN2(size32 - (i * 4), 4);
978 if (tmp->type != datatype || tmp->size32 != slot_size)
979 break;
980 if (memcmp(tmp->values, &values[i * 4],
981 slot_size * sizeof(gl_constant_value)))
982 break;
983
984 /* Everything matches, keep going until the full size is matched */
985 tmp = (immediate_storage *)tmp->next;
986 }
987
988 /* The full value matched */
989 if (i * 4 >= size32)
990 return index;
991
992 index++;
993 }
994
995 for (i = 0; i * 4 < size32; i++) {
996 int slot_size = MIN2(size32 - (i * 4), 4);
997 /* Add this immediate to the list. */
998 entry = new(mem_ctx) immediate_storage(&values[i * 4], slot_size, datatype);
999 this->immediates.push_tail(entry);
1000 this->num_immediates++;
1001 }
1002 return index;
1003 }
1004
1005 st_src_reg
1006 glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
1007 {
1008 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT);
1009 union gl_constant_value uval;
1010
1011 uval.f = val;
1012 src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle);
1013
1014 return src;
1015 }
1016
1017 st_src_reg
1018 glsl_to_tgsi_visitor::st_src_reg_for_double(double val)
1019 {
1020 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_DOUBLE);
1021 union gl_constant_value uval[2];
1022
1023 uval[0].u = *(uint32_t *)&val;
1024 uval[1].u = *(((uint32_t *)&val) + 1);
1025 src.index = add_constant(src.file, uval, 1, GL_DOUBLE, &src.swizzle);
1026
1027 return src;
1028 }
1029
1030 st_src_reg
1031 glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
1032 {
1033 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT);
1034 union gl_constant_value uval;
1035
1036 assert(native_integers);
1037
1038 uval.i = val;
1039 src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle);
1040
1041 return src;
1042 }
1043
1044 st_src_reg
1045 glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val)
1046 {
1047 if (native_integers)
1048 return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) :
1049 st_src_reg_for_int(val);
1050 else
1051 return st_src_reg_for_float(val);
1052 }
1053
1054 static int
1055 type_size(const struct glsl_type *type)
1056 {
1057 unsigned int i;
1058 int size;
1059
1060 switch (type->base_type) {
1061 case GLSL_TYPE_UINT:
1062 case GLSL_TYPE_INT:
1063 case GLSL_TYPE_FLOAT:
1064 case GLSL_TYPE_BOOL:
1065 if (type->is_matrix()) {
1066 return type->matrix_columns;
1067 } else {
1068 /* Regardless of size of vector, it gets a vec4. This is bad
1069 * packing for things like floats, but otherwise arrays become a
1070 * mess. Hopefully a later pass over the code can pack scalars
1071 * down if appropriate.
1072 */
1073 return 1;
1074 }
1075 break;
1076 case GLSL_TYPE_DOUBLE:
1077 if (type->is_matrix()) {
1078 if (type->vector_elements <= 2)
1079 return type->matrix_columns;
1080 else
1081 return type->matrix_columns * 2;
1082 } else {
1083 /* For doubles if we have a double or dvec2 they fit in one
1084 * vec4, else they need 2 vec4s.
1085 */
1086 if (type->vector_elements <= 2)
1087 return 1;
1088 else
1089 return 2;
1090 }
1091 break;
1092 case GLSL_TYPE_ARRAY:
1093 assert(type->length > 0);
1094 return type_size(type->fields.array) * type->length;
1095 case GLSL_TYPE_STRUCT:
1096 size = 0;
1097 for (i = 0; i < type->length; i++) {
1098 size += type_size(type->fields.structure[i].type);
1099 }
1100 return size;
1101 case GLSL_TYPE_SAMPLER:
1102 case GLSL_TYPE_IMAGE:
1103 case GLSL_TYPE_SUBROUTINE:
1104 /* Samplers take up one slot in UNIFORMS[], but they're baked in
1105 * at link time.
1106 */
1107 return 1;
1108 case GLSL_TYPE_ATOMIC_UINT:
1109 case GLSL_TYPE_INTERFACE:
1110 case GLSL_TYPE_VOID:
1111 case GLSL_TYPE_ERROR:
1112 assert(!"Invalid type in type_size");
1113 break;
1114 }
1115 return 0;
1116 }
1117
1118
1119 /**
1120 * If the given GLSL type is an array or matrix or a structure containing
1121 * an array/matrix member, return true. Else return false.
1122 *
1123 * This is used to determine which kind of temp storage (PROGRAM_TEMPORARY
1124 * or PROGRAM_ARRAY) should be used for variables of this type. Anytime
1125 * we have an array that might be indexed with a variable, we need to use
1126 * the later storage type.
1127 */
1128 static bool
1129 type_has_array_or_matrix(const glsl_type *type)
1130 {
1131 if (type->is_array() || type->is_matrix())
1132 return true;
1133
1134 if (type->is_record()) {
1135 for (unsigned i = 0; i < type->length; i++) {
1136 if (type_has_array_or_matrix(type->fields.structure[i].type)) {
1137 return true;
1138 }
1139 }
1140 }
1141
1142 return false;
1143 }
1144
1145
1146 /**
1147 * In the initial pass of codegen, we assign temporary numbers to
1148 * intermediate results. (not SSA -- variable assignments will reuse
1149 * storage).
1150 */
1151 st_src_reg
1152 glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
1153 {
1154 st_src_reg src;
1155
1156 src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
1157 src.reladdr = NULL;
1158 src.negate = 0;
1159
1160 if (!options->EmitNoIndirectTemp && type_has_array_or_matrix(type)) {
1161 if (next_array >= max_num_arrays) {
1162 max_num_arrays += 32;
1163 array_sizes = (unsigned*)
1164 realloc(array_sizes, sizeof(array_sizes[0]) * max_num_arrays);
1165 }
1166
1167 src.file = PROGRAM_ARRAY;
1168 src.index = next_array << 16 | 0x8000;
1169 array_sizes[next_array] = type_size(type);
1170 ++next_array;
1171
1172 } else {
1173 src.file = PROGRAM_TEMPORARY;
1174 src.index = next_temp;
1175 next_temp += type_size(type);
1176 }
1177
1178 if (type->is_array() || type->is_record()) {
1179 src.swizzle = SWIZZLE_NOOP;
1180 } else {
1181 src.swizzle = swizzle_for_size(type->vector_elements);
1182 }
1183
1184 return src;
1185 }
1186
1187 variable_storage *
1188 glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var)
1189 {
1190
1191 foreach_in_list(variable_storage, entry, &this->variables) {
1192 if (entry->var == var)
1193 return entry;
1194 }
1195
1196 return NULL;
1197 }
1198
1199 void
1200 glsl_to_tgsi_visitor::visit(ir_variable *ir)
1201 {
1202 if (strcmp(ir->name, "gl_FragCoord") == 0) {
1203 struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
1204
1205 fp->OriginUpperLeft = ir->data.origin_upper_left;
1206 fp->PixelCenterInteger = ir->data.pixel_center_integer;
1207 }
1208
1209 if (ir->data.mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
1210 unsigned int i;
1211 const ir_state_slot *const slots = ir->get_state_slots();
1212 assert(slots != NULL);
1213
1214 /* Check if this statevar's setup in the STATE file exactly
1215 * matches how we'll want to reference it as a
1216 * struct/array/whatever. If not, then we need to move it into
1217 * temporary storage and hope that it'll get copy-propagated
1218 * out.
1219 */
1220 for (i = 0; i < ir->get_num_state_slots(); i++) {
1221 if (slots[i].swizzle != SWIZZLE_XYZW) {
1222 break;
1223 }
1224 }
1225
1226 variable_storage *storage;
1227 st_dst_reg dst;
1228 if (i == ir->get_num_state_slots()) {
1229 /* We'll set the index later. */
1230 storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
1231 this->variables.push_tail(storage);
1232
1233 dst = undef_dst;
1234 } else {
1235 /* The variable_storage constructor allocates slots based on the size
1236 * of the type. However, this had better match the number of state
1237 * elements that we're going to copy into the new temporary.
1238 */
1239 assert((int) ir->get_num_state_slots() == type_size(ir->type));
1240
1241 dst = st_dst_reg(get_temp(ir->type));
1242
1243 storage = new(mem_ctx) variable_storage(ir, dst.file, dst.index);
1244
1245 this->variables.push_tail(storage);
1246 }
1247
1248
1249 for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) {
1250 int index = _mesa_add_state_reference(this->prog->Parameters,
1251 (gl_state_index *)slots[i].tokens);
1252
1253 if (storage->file == PROGRAM_STATE_VAR) {
1254 if (storage->index == -1) {
1255 storage->index = index;
1256 } else {
1257 assert(index == storage->index + (int)i);
1258 }
1259 } else {
1260 /* We use GLSL_TYPE_FLOAT here regardless of the actual type of
1261 * the data being moved since MOV does not care about the type of
1262 * data it is moving, and we don't want to declare registers with
1263 * array or struct types.
1264 */
1265 st_src_reg src(PROGRAM_STATE_VAR, index, GLSL_TYPE_FLOAT);
1266 src.swizzle = slots[i].swizzle;
1267 emit_asm(ir, TGSI_OPCODE_MOV, dst, src);
1268 /* even a float takes up a whole vec4 reg in a struct/array. */
1269 dst.index++;
1270 }
1271 }
1272
1273 if (storage->file == PROGRAM_TEMPORARY &&
1274 dst.index != storage->index + (int) ir->get_num_state_slots()) {
1275 fail_link(this->shader_program,
1276 "failed to load builtin uniform `%s' (%d/%d regs loaded)\n",
1277 ir->name, dst.index - storage->index,
1278 type_size(ir->type));
1279 }
1280 }
1281 }
1282
1283 void
1284 glsl_to_tgsi_visitor::visit(ir_loop *ir)
1285 {
1286 emit_asm(NULL, TGSI_OPCODE_BGNLOOP);
1287
1288 visit_exec_list(&ir->body_instructions, this);
1289
1290 emit_asm(NULL, TGSI_OPCODE_ENDLOOP);
1291 }
1292
1293 void
1294 glsl_to_tgsi_visitor::visit(ir_loop_jump *ir)
1295 {
1296 switch (ir->mode) {
1297 case ir_loop_jump::jump_break:
1298 emit_asm(NULL, TGSI_OPCODE_BRK);
1299 break;
1300 case ir_loop_jump::jump_continue:
1301 emit_asm(NULL, TGSI_OPCODE_CONT);
1302 break;
1303 }
1304 }
1305
1306
1307 void
1308 glsl_to_tgsi_visitor::visit(ir_function_signature *ir)
1309 {
1310 assert(0);
1311 (void)ir;
1312 }
1313
1314 void
1315 glsl_to_tgsi_visitor::visit(ir_function *ir)
1316 {
1317 /* Ignore function bodies other than main() -- we shouldn't see calls to
1318 * them since they should all be inlined before we get to glsl_to_tgsi.
1319 */
1320 if (strcmp(ir->name, "main") == 0) {
1321 const ir_function_signature *sig;
1322 exec_list empty;
1323
1324 sig = ir->matching_signature(NULL, &empty, false);
1325
1326 assert(sig);
1327
1328 foreach_in_list(ir_instruction, ir, &sig->body) {
1329 ir->accept(this);
1330 }
1331 }
1332 }
1333
1334 bool
1335 glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
1336 {
1337 int nonmul_operand = 1 - mul_operand;
1338 st_src_reg a, b, c;
1339 st_dst_reg result_dst;
1340
1341 ir_expression *expr = ir->operands[mul_operand]->as_expression();
1342 if (!expr || expr->operation != ir_binop_mul)
1343 return false;
1344
1345 expr->operands[0]->accept(this);
1346 a = this->result;
1347 expr->operands[1]->accept(this);
1348 b = this->result;
1349 ir->operands[nonmul_operand]->accept(this);
1350 c = this->result;
1351
1352 this->result = get_temp(ir->type);
1353 result_dst = st_dst_reg(this->result);
1354 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1355 emit_asm(ir, TGSI_OPCODE_MAD, result_dst, a, b, c);
1356
1357 return true;
1358 }
1359
1360 /**
1361 * Emit MAD(a, -b, a) instead of AND(a, NOT(b))
1362 *
1363 * The logic values are 1.0 for true and 0.0 for false. Logical-and is
1364 * implemented using multiplication, and logical-or is implemented using
1365 * addition. Logical-not can be implemented as (true - x), or (1.0 - x).
1366 * As result, the logical expression (a & !b) can be rewritten as:
1367 *
1368 * - a * !b
1369 * - a * (1 - b)
1370 * - (a * 1) - (a * b)
1371 * - a + -(a * b)
1372 * - a + (a * -b)
1373 *
1374 * This final expression can be implemented as a single MAD(a, -b, a)
1375 * instruction.
1376 */
1377 bool
1378 glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
1379 {
1380 const int other_operand = 1 - try_operand;
1381 st_src_reg a, b;
1382
1383 ir_expression *expr = ir->operands[try_operand]->as_expression();
1384 if (!expr || expr->operation != ir_unop_logic_not)
1385 return false;
1386
1387 ir->operands[other_operand]->accept(this);
1388 a = this->result;
1389 expr->operands[0]->accept(this);
1390 b = this->result;
1391
1392 b.negate = ~b.negate;
1393
1394 this->result = get_temp(ir->type);
1395 emit_asm(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a);
1396
1397 return true;
1398 }
1399
1400 void
1401 glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
1402 st_src_reg *reg, int *num_reladdr)
1403 {
1404 if (!reg->reladdr && !reg->reladdr2)
1405 return;
1406
1407 if (reg->reladdr) emit_arl(ir, address_reg, *reg->reladdr);
1408 if (reg->reladdr2) emit_arl(ir, address_reg2, *reg->reladdr2);
1409
1410 if (*num_reladdr != 1) {
1411 st_src_reg temp = get_temp(glsl_type::vec4_type);
1412
1413 emit_asm(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg);
1414 *reg = temp;
1415 }
1416
1417 (*num_reladdr)--;
1418 }
1419
1420 void
1421 glsl_to_tgsi_visitor::visit(ir_expression *ir)
1422 {
1423 unsigned int operand;
1424 st_src_reg op[ARRAY_SIZE(ir->operands)];
1425 st_src_reg result_src;
1426 st_dst_reg result_dst;
1427
1428 /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c)
1429 */
1430 if (ir->operation == ir_binop_add) {
1431 if (try_emit_mad(ir, 1))
1432 return;
1433 if (try_emit_mad(ir, 0))
1434 return;
1435 }
1436
1437 /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b))
1438 */
1439 if (!native_integers && ir->operation == ir_binop_logic_and) {
1440 if (try_emit_mad_for_and_not(ir, 1))
1441 return;
1442 if (try_emit_mad_for_and_not(ir, 0))
1443 return;
1444 }
1445
1446 if (ir->operation == ir_quadop_vector)
1447 assert(!"ir_quadop_vector should have been lowered");
1448
1449 for (operand = 0; operand < ir->get_num_operands(); operand++) {
1450 this->result.file = PROGRAM_UNDEFINED;
1451 ir->operands[operand]->accept(this);
1452 if (this->result.file == PROGRAM_UNDEFINED) {
1453 printf("Failed to get tree for expression operand:\n");
1454 ir->operands[operand]->print();
1455 printf("\n");
1456 exit(1);
1457 }
1458 op[operand] = this->result;
1459
1460 /* Matrix expression operands should have been broken down to vector
1461 * operations already.
1462 */
1463 assert(!ir->operands[operand]->type->is_matrix());
1464 }
1465
1466 int vector_elements = ir->operands[0]->type->vector_elements;
1467 if (ir->operands[1]) {
1468 vector_elements = MAX2(vector_elements,
1469 ir->operands[1]->type->vector_elements);
1470 }
1471
1472 this->result.file = PROGRAM_UNDEFINED;
1473
1474 /* Storage for our result. Ideally for an assignment we'd be using
1475 * the actual storage for the result here, instead.
1476 */
1477 result_src = get_temp(ir->type);
1478 /* convenience for the emit functions below. */
1479 result_dst = st_dst_reg(result_src);
1480 /* Limit writes to the channels that will be used by result_src later.
1481 * This does limit this temp's use as a temporary for multi-instruction
1482 * sequences.
1483 */
1484 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1485
1486 switch (ir->operation) {
1487 case ir_unop_logic_not:
1488 if (result_dst.type != GLSL_TYPE_FLOAT)
1489 emit_asm(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
1490 else {
1491 /* Previously 'SEQ dst, src, 0.0' was used for this. However, many
1492 * older GPUs implement SEQ using multiple instructions (i915 uses two
1493 * SGE instructions and a MUL instruction). Since our logic values are
1494 * 0.0 and 1.0, 1-x also implements !x.
1495 */
1496 op[0].negate = ~op[0].negate;
1497 emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0));
1498 }
1499 break;
1500 case ir_unop_neg:
1501 if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT)
1502 emit_asm(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
1503 else if (result_dst.type == GLSL_TYPE_DOUBLE)
1504 emit_asm(ir, TGSI_OPCODE_DNEG, result_dst, op[0]);
1505 else {
1506 op[0].negate = ~op[0].negate;
1507 result_src = op[0];
1508 }
1509 break;
1510 case ir_unop_subroutine_to_int:
1511 emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
1512 break;
1513 case ir_unop_abs:
1514 emit_asm(ir, TGSI_OPCODE_ABS, result_dst, op[0]);
1515 break;
1516 case ir_unop_sign:
1517 emit_asm(ir, TGSI_OPCODE_SSG, result_dst, op[0]);
1518 break;
1519 case ir_unop_rcp:
1520 emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]);
1521 break;
1522
1523 case ir_unop_exp2:
1524 emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]);
1525 break;
1526 case ir_unop_exp:
1527 case ir_unop_log:
1528 assert(!"not reached: should be handled by ir_explog_to_explog2");
1529 break;
1530 case ir_unop_log2:
1531 emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]);
1532 break;
1533 case ir_unop_sin:
1534 emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]);
1535 break;
1536 case ir_unop_cos:
1537 emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]);
1538 break;
1539 case ir_unop_saturate: {
1540 glsl_to_tgsi_instruction *inst;
1541 inst = emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
1542 inst->saturate = true;
1543 break;
1544 }
1545
1546 case ir_unop_dFdx:
1547 case ir_unop_dFdx_coarse:
1548 emit_asm(ir, TGSI_OPCODE_DDX, result_dst, op[0]);
1549 break;
1550 case ir_unop_dFdx_fine:
1551 emit_asm(ir, TGSI_OPCODE_DDX_FINE, result_dst, op[0]);
1552 break;
1553 case ir_unop_dFdy:
1554 case ir_unop_dFdy_coarse:
1555 case ir_unop_dFdy_fine:
1556 {
1557 /* The X component contains 1 or -1 depending on whether the framebuffer
1558 * is a FBO or the window system buffer, respectively.
1559 * It is then multiplied with the source operand of DDY.
1560 */
1561 static const gl_state_index transform_y_state[STATE_LENGTH]
1562 = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM };
1563
1564 unsigned transform_y_index =
1565 _mesa_add_state_reference(this->prog->Parameters,
1566 transform_y_state);
1567
1568 st_src_reg transform_y = st_src_reg(PROGRAM_STATE_VAR,
1569 transform_y_index,
1570 glsl_type::vec4_type);
1571 transform_y.swizzle = SWIZZLE_XXXX;
1572
1573 st_src_reg temp = get_temp(glsl_type::vec4_type);
1574
1575 emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(temp), transform_y, op[0]);
1576 emit_asm(ir, ir->operation == ir_unop_dFdy_fine ?
1577 TGSI_OPCODE_DDY_FINE : TGSI_OPCODE_DDY, result_dst, temp);
1578 break;
1579 }
1580
1581 case ir_unop_frexp_sig:
1582 emit_asm(ir, TGSI_OPCODE_DFRACEXP, result_dst, undef_dst, op[0]);
1583 break;
1584
1585 case ir_unop_frexp_exp:
1586 emit_asm(ir, TGSI_OPCODE_DFRACEXP, undef_dst, result_dst, op[0]);
1587 break;
1588
1589 case ir_unop_noise: {
1590 /* At some point, a motivated person could add a better
1591 * implementation of noise. Currently not even the nvidia
1592 * binary drivers do anything more than this. In any case, the
1593 * place to do this is in the GL state tracker, not the poor
1594 * driver.
1595 */
1596 emit_asm(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5));
1597 break;
1598 }
1599
1600 case ir_binop_add:
1601 emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
1602 break;
1603 case ir_binop_sub:
1604 emit_asm(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]);
1605 break;
1606
1607 case ir_binop_mul:
1608 emit_asm(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
1609 break;
1610 case ir_binop_div:
1611 if (result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_DOUBLE)
1612 assert(!"not reached: should be handled by ir_div_to_mul_rcp");
1613 else
1614 emit_asm(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]);
1615 break;
1616 case ir_binop_mod:
1617 if (result_dst.type == GLSL_TYPE_FLOAT)
1618 assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
1619 else
1620 emit_asm(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]);
1621 break;
1622
1623 case ir_binop_less:
1624 emit_asm(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]);
1625 break;
1626 case ir_binop_greater:
1627 emit_asm(ir, TGSI_OPCODE_SLT, result_dst, op[1], op[0]);
1628 break;
1629 case ir_binop_lequal:
1630 emit_asm(ir, TGSI_OPCODE_SGE, result_dst, op[1], op[0]);
1631 break;
1632 case ir_binop_gequal:
1633 emit_asm(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]);
1634 break;
1635 case ir_binop_equal:
1636 emit_asm(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
1637 break;
1638 case ir_binop_nequal:
1639 emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1640 break;
1641 case ir_binop_all_equal:
1642 /* "==" operator producing a scalar boolean. */
1643 if (ir->operands[0]->type->is_vector() ||
1644 ir->operands[1]->type->is_vector()) {
1645 st_src_reg temp = get_temp(native_integers ?
1646 glsl_type::uvec4_type :
1647 glsl_type::vec4_type);
1648
1649 if (native_integers) {
1650 st_dst_reg temp_dst = st_dst_reg(temp);
1651 st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp);
1652
1653 emit_asm(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]);
1654
1655 /* Emit 1-3 AND operations to combine the SEQ results. */
1656 switch (ir->operands[0]->type->vector_elements) {
1657 case 2:
1658 break;
1659 case 3:
1660 temp_dst.writemask = WRITEMASK_Y;
1661 temp1.swizzle = SWIZZLE_YYYY;
1662 temp2.swizzle = SWIZZLE_ZZZZ;
1663 emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
1664 break;
1665 case 4:
1666 temp_dst.writemask = WRITEMASK_X;
1667 temp1.swizzle = SWIZZLE_XXXX;
1668 temp2.swizzle = SWIZZLE_YYYY;
1669 emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
1670 temp_dst.writemask = WRITEMASK_Y;
1671 temp1.swizzle = SWIZZLE_ZZZZ;
1672 temp2.swizzle = SWIZZLE_WWWW;
1673 emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
1674 }
1675
1676 temp1.swizzle = SWIZZLE_XXXX;
1677 temp2.swizzle = SWIZZLE_YYYY;
1678 emit_asm(ir, TGSI_OPCODE_AND, result_dst, temp1, temp2);
1679 } else {
1680 emit_asm(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
1681
1682 /* After the dot-product, the value will be an integer on the
1683 * range [0,4]. Zero becomes 1.0, and positive values become zero.
1684 */
1685 emit_dp(ir, result_dst, temp, temp, vector_elements);
1686
1687 /* Negating the result of the dot-product gives values on the range
1688 * [-4, 0]. Zero becomes 1.0, and negative values become zero.
1689 * This is achieved using SGE.
1690 */
1691 st_src_reg sge_src = result_src;
1692 sge_src.negate = ~sge_src.negate;
1693 emit_asm(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0));
1694 }
1695 } else {
1696 emit_asm(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
1697 }
1698 break;
1699 case ir_binop_any_nequal:
1700 /* "!=" operator producing a scalar boolean. */
1701 if (ir->operands[0]->type->is_vector() ||
1702 ir->operands[1]->type->is_vector()) {
1703 st_src_reg temp = get_temp(native_integers ?
1704 glsl_type::uvec4_type :
1705 glsl_type::vec4_type);
1706 emit_asm(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
1707
1708 if (native_integers) {
1709 st_dst_reg temp_dst = st_dst_reg(temp);
1710 st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp);
1711
1712 /* Emit 1-3 OR operations to combine the SNE results. */
1713 switch (ir->operands[0]->type->vector_elements) {
1714 case 2:
1715 break;
1716 case 3:
1717 temp_dst.writemask = WRITEMASK_Y;
1718 temp1.swizzle = SWIZZLE_YYYY;
1719 temp2.swizzle = SWIZZLE_ZZZZ;
1720 emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
1721 break;
1722 case 4:
1723 temp_dst.writemask = WRITEMASK_X;
1724 temp1.swizzle = SWIZZLE_XXXX;
1725 temp2.swizzle = SWIZZLE_YYYY;
1726 emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
1727 temp_dst.writemask = WRITEMASK_Y;
1728 temp1.swizzle = SWIZZLE_ZZZZ;
1729 temp2.swizzle = SWIZZLE_WWWW;
1730 emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
1731 }
1732
1733 temp1.swizzle = SWIZZLE_XXXX;
1734 temp2.swizzle = SWIZZLE_YYYY;
1735 emit_asm(ir, TGSI_OPCODE_OR, result_dst, temp1, temp2);
1736 } else {
1737 /* After the dot-product, the value will be an integer on the
1738 * range [0,4]. Zero stays zero, and positive values become 1.0.
1739 */
1740 glsl_to_tgsi_instruction *const dp =
1741 emit_dp(ir, result_dst, temp, temp, vector_elements);
1742 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1743 /* The clamping to [0,1] can be done for free in the fragment
1744 * shader with a saturate.
1745 */
1746 dp->saturate = true;
1747 } else {
1748 /* Negating the result of the dot-product gives values on the range
1749 * [-4, 0]. Zero stays zero, and negative values become 1.0. This
1750 * achieved using SLT.
1751 */
1752 st_src_reg slt_src = result_src;
1753 slt_src.negate = ~slt_src.negate;
1754 emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
1755 }
1756 }
1757 } else {
1758 emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1759 }
1760 break;
1761
1762 case ir_unop_any: {
1763 assert(ir->operands[0]->type->is_vector());
1764
1765 if (native_integers) {
1766 int dst_swizzle = 0, op0_swizzle, i;
1767 st_src_reg accum = op[0];
1768
1769 op0_swizzle = op[0].swizzle;
1770 accum.swizzle = MAKE_SWIZZLE4(GET_SWZ(op0_swizzle, 0),
1771 GET_SWZ(op0_swizzle, 0),
1772 GET_SWZ(op0_swizzle, 0),
1773 GET_SWZ(op0_swizzle, 0));
1774 for (i = 0; i < 4; i++) {
1775 if (result_dst.writemask & (1 << i)) {
1776 dst_swizzle = MAKE_SWIZZLE4(i, i, i, i);
1777 break;
1778 }
1779 }
1780 assert(i != 4);
1781 assert(ir->operands[0]->type->is_boolean());
1782
1783 /* OR all the components together, since they should be either 0 or ~0
1784 */
1785 switch (ir->operands[0]->type->vector_elements) {
1786 case 4:
1787 op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(op0_swizzle, 3),
1788 GET_SWZ(op0_swizzle, 3),
1789 GET_SWZ(op0_swizzle, 3),
1790 GET_SWZ(op0_swizzle, 3));
1791 emit_asm(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]);
1792 accum = st_src_reg(result_dst);
1793 accum.swizzle = dst_swizzle;
1794 /* fallthrough */
1795 case 3:
1796 op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(op0_swizzle, 2),
1797 GET_SWZ(op0_swizzle, 2),
1798 GET_SWZ(op0_swizzle, 2),
1799 GET_SWZ(op0_swizzle, 2));
1800 emit_asm(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]);
1801 accum = st_src_reg(result_dst);
1802 accum.swizzle = dst_swizzle;
1803 /* fallthrough */
1804 case 2:
1805 op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(op0_swizzle, 1),
1806 GET_SWZ(op0_swizzle, 1),
1807 GET_SWZ(op0_swizzle, 1),
1808 GET_SWZ(op0_swizzle, 1));
1809 emit_asm(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]);
1810 break;
1811 default:
1812 assert(!"Unexpected vector size");
1813 break;
1814 }
1815 } else {
1816 /* After the dot-product, the value will be an integer on the
1817 * range [0,4]. Zero stays zero, and positive values become 1.0.
1818 */
1819 glsl_to_tgsi_instruction *const dp =
1820 emit_dp(ir, result_dst, op[0], op[0],
1821 ir->operands[0]->type->vector_elements);
1822 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
1823 result_dst.type == GLSL_TYPE_FLOAT) {
1824 /* The clamping to [0,1] can be done for free in the fragment
1825 * shader with a saturate.
1826 */
1827 dp->saturate = true;
1828 } else if (result_dst.type == GLSL_TYPE_FLOAT) {
1829 /* Negating the result of the dot-product gives values on the range
1830 * [-4, 0]. Zero stays zero, and negative values become 1.0. This
1831 * is achieved using SLT.
1832 */
1833 st_src_reg slt_src = result_src;
1834 slt_src.negate = ~slt_src.negate;
1835 emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
1836 }
1837 else {
1838 /* Use SNE 0 if integers are being used as boolean values. */
1839 emit_asm(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
1840 }
1841 }
1842 break;
1843 }
1844
1845 case ir_binop_logic_xor:
1846 if (native_integers)
1847 emit_asm(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
1848 else
1849 emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1850 break;
1851
1852 case ir_binop_logic_or: {
1853 if (native_integers) {
1854 /* If integers are used as booleans, we can use an actual "or"
1855 * instruction.
1856 */
1857 assert(native_integers);
1858 emit_asm(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]);
1859 } else {
1860 /* After the addition, the value will be an integer on the
1861 * range [0,2]. Zero stays zero, and positive values become 1.0.
1862 */
1863 glsl_to_tgsi_instruction *add =
1864 emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
1865 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1866 /* The clamping to [0,1] can be done for free in the fragment
1867 * shader with a saturate if floats are being used as boolean values.
1868 */
1869 add->saturate = true;
1870 } else {
1871 /* Negating the result of the addition gives values on the range
1872 * [-2, 0]. Zero stays zero, and negative values become 1.0. This
1873 * is achieved using SLT.
1874 */
1875 st_src_reg slt_src = result_src;
1876 slt_src.negate = ~slt_src.negate;
1877 emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
1878 }
1879 }
1880 break;
1881 }
1882
1883 case ir_binop_logic_and:
1884 /* If native integers are disabled, the bool args are stored as float 0.0
1885 * or 1.0, so "mul" gives us "and". If they're enabled, just use the
1886 * actual AND opcode.
1887 */
1888 if (native_integers)
1889 emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]);
1890 else
1891 emit_asm(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
1892 break;
1893
1894 case ir_binop_dot:
1895 assert(ir->operands[0]->type->is_vector());
1896 assert(ir->operands[0]->type == ir->operands[1]->type);
1897 emit_dp(ir, result_dst, op[0], op[1],
1898 ir->operands[0]->type->vector_elements);
1899 break;
1900
1901 case ir_unop_sqrt:
1902 if (have_sqrt) {
1903 emit_scalar(ir, TGSI_OPCODE_SQRT, result_dst, op[0]);
1904 } else {
1905 /* sqrt(x) = x * rsq(x). */
1906 emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
1907 emit_asm(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]);
1908 /* For incoming channels <= 0, set the result to 0. */
1909 op[0].negate = ~op[0].negate;
1910 emit_asm(ir, TGSI_OPCODE_CMP, result_dst,
1911 op[0], result_src, st_src_reg_for_float(0.0));
1912 }
1913 break;
1914 case ir_unop_rsq:
1915 emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
1916 break;
1917 case ir_unop_i2f:
1918 if (native_integers) {
1919 emit_asm(ir, TGSI_OPCODE_I2F, result_dst, op[0]);
1920 break;
1921 }
1922 /* fallthrough to next case otherwise */
1923 case ir_unop_b2f:
1924 if (native_integers) {
1925 emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0));
1926 break;
1927 }
1928 /* fallthrough to next case otherwise */
1929 case ir_unop_i2u:
1930 case ir_unop_u2i:
1931 /* Converting between signed and unsigned integers is a no-op. */
1932 result_src = op[0];
1933 break;
1934 case ir_unop_b2i:
1935 if (native_integers) {
1936 /* Booleans are stored as integers using ~0 for true and 0 for false.
1937 * GLSL requires that int(bool) return 1 for true and 0 for false.
1938 * This conversion is done with AND, but it could be done with NEG.
1939 */
1940 emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1));
1941 } else {
1942 /* Booleans and integers are both stored as floats when native
1943 * integers are disabled.
1944 */
1945 result_src = op[0];
1946 }
1947 break;
1948 case ir_unop_f2i:
1949 if (native_integers)
1950 emit_asm(ir, TGSI_OPCODE_F2I, result_dst, op[0]);
1951 else
1952 emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
1953 break;
1954 case ir_unop_f2u:
1955 if (native_integers)
1956 emit_asm(ir, TGSI_OPCODE_F2U, result_dst, op[0]);
1957 else
1958 emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
1959 break;
1960 case ir_unop_bitcast_f2i:
1961 result_src = op[0];
1962 result_src.type = GLSL_TYPE_INT;
1963 break;
1964 case ir_unop_bitcast_f2u:
1965 result_src = op[0];
1966 result_src.type = GLSL_TYPE_UINT;
1967 break;
1968 case ir_unop_bitcast_i2f:
1969 case ir_unop_bitcast_u2f:
1970 result_src = op[0];
1971 result_src.type = GLSL_TYPE_FLOAT;
1972 break;
1973 case ir_unop_f2b:
1974 emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
1975 break;
1976 case ir_unop_d2b:
1977 emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_double(0.0));
1978 break;
1979 case ir_unop_i2b:
1980 if (native_integers)
1981 emit_asm(ir, TGSI_OPCODE_USNE, result_dst, op[0], st_src_reg_for_int(0));
1982 else
1983 emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
1984 break;
1985 case ir_unop_trunc:
1986 emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
1987 break;
1988 case ir_unop_ceil:
1989 emit_asm(ir, TGSI_OPCODE_CEIL, result_dst, op[0]);
1990 break;
1991 case ir_unop_floor:
1992 emit_asm(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
1993 break;
1994 case ir_unop_round_even:
1995 emit_asm(ir, TGSI_OPCODE_ROUND, result_dst, op[0]);
1996 break;
1997 case ir_unop_fract:
1998 emit_asm(ir, TGSI_OPCODE_FRC, result_dst, op[0]);
1999 break;
2000
2001 case ir_binop_min:
2002 emit_asm(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]);
2003 break;
2004 case ir_binop_max:
2005 emit_asm(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]);
2006 break;
2007 case ir_binop_pow:
2008 emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]);
2009 break;
2010
2011 case ir_unop_bit_not:
2012 if (native_integers) {
2013 emit_asm(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
2014 break;
2015 }
2016 case ir_unop_u2f:
2017 if (native_integers) {
2018 emit_asm(ir, TGSI_OPCODE_U2F, result_dst, op[0]);
2019 break;
2020 }
2021 case ir_binop_lshift:
2022 if (native_integers) {
2023 emit_asm(ir, TGSI_OPCODE_SHL, result_dst, op[0], op[1]);
2024 break;
2025 }
2026 case ir_binop_rshift:
2027 if (native_integers) {
2028 emit_asm(ir, TGSI_OPCODE_ISHR, result_dst, op[0], op[1]);
2029 break;
2030 }
2031 case ir_binop_bit_and:
2032 if (native_integers) {
2033 emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]);
2034 break;
2035 }
2036 case ir_binop_bit_xor:
2037 if (native_integers) {
2038 emit_asm(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
2039 break;
2040 }
2041 case ir_binop_bit_or:
2042 if (native_integers) {
2043 emit_asm(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]);
2044 break;
2045 }
2046
2047 assert(!"GLSL 1.30 features unsupported");
2048 break;
2049
2050 case ir_binop_ubo_load: {
2051 ir_constant *const_uniform_block = ir->operands[0]->as_constant();
2052 ir_constant *const_offset_ir = ir->operands[1]->as_constant();
2053 unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0;
2054 unsigned const_block = const_uniform_block ? const_uniform_block->value.u[0] + 1 : 0;
2055 st_src_reg index_reg = get_temp(glsl_type::uint_type);
2056 st_src_reg cbuf;
2057
2058 cbuf.type = ir->type->base_type;
2059 cbuf.file = PROGRAM_CONSTANT;
2060 cbuf.index = 0;
2061 cbuf.reladdr = NULL;
2062 cbuf.negate = 0;
2063
2064 assert(ir->type->is_vector() || ir->type->is_scalar());
2065
2066 if (const_offset_ir) {
2067 /* Constant index into constant buffer */
2068 cbuf.reladdr = NULL;
2069 cbuf.index = const_offset / 16;
2070 }
2071 else {
2072 /* Relative/variable index into constant buffer */
2073 emit_asm(ir, TGSI_OPCODE_USHR, st_dst_reg(index_reg), op[1],
2074 st_src_reg_for_int(4));
2075 cbuf.reladdr = ralloc(mem_ctx, st_src_reg);
2076 memcpy(cbuf.reladdr, &index_reg, sizeof(index_reg));
2077 }
2078
2079 if (const_uniform_block) {
2080 /* Constant constant buffer */
2081 cbuf.reladdr2 = NULL;
2082 cbuf.index2D = const_block;
2083 cbuf.has_index2 = true;
2084 }
2085 else {
2086 /* Relative/variable constant buffer */
2087 cbuf.reladdr2 = ralloc(mem_ctx, st_src_reg);
2088 cbuf.index2D = 1;
2089 memcpy(cbuf.reladdr2, &op[0], sizeof(st_src_reg));
2090 cbuf.has_index2 = true;
2091 }
2092
2093 cbuf.swizzle = swizzle_for_size(ir->type->vector_elements);
2094 if (cbuf.type == GLSL_TYPE_DOUBLE)
2095 cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 8,
2096 const_offset % 16 / 8,
2097 const_offset % 16 / 8,
2098 const_offset % 16 / 8);
2099 else
2100 cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 4,
2101 const_offset % 16 / 4,
2102 const_offset % 16 / 4,
2103 const_offset % 16 / 4);
2104
2105 if (ir->type->base_type == GLSL_TYPE_BOOL) {
2106 emit_asm(ir, TGSI_OPCODE_USNE, result_dst, cbuf, st_src_reg_for_int(0));
2107 } else {
2108 emit_asm(ir, TGSI_OPCODE_MOV, result_dst, cbuf);
2109 }
2110 break;
2111 }
2112 case ir_triop_lrp:
2113 /* note: we have to reorder the three args here */
2114 emit_asm(ir, TGSI_OPCODE_LRP, result_dst, op[2], op[1], op[0]);
2115 break;
2116 case ir_triop_csel:
2117 if (this->ctx->Const.NativeIntegers)
2118 emit_asm(ir, TGSI_OPCODE_UCMP, result_dst, op[0], op[1], op[2]);
2119 else {
2120 op[0].negate = ~op[0].negate;
2121 emit_asm(ir, TGSI_OPCODE_CMP, result_dst, op[0], op[1], op[2]);
2122 }
2123 break;
2124 case ir_triop_bitfield_extract:
2125 emit_asm(ir, TGSI_OPCODE_IBFE, result_dst, op[0], op[1], op[2]);
2126 break;
2127 case ir_quadop_bitfield_insert:
2128 emit_asm(ir, TGSI_OPCODE_BFI, result_dst, op[0], op[1], op[2], op[3]);
2129 break;
2130 case ir_unop_bitfield_reverse:
2131 emit_asm(ir, TGSI_OPCODE_BREV, result_dst, op[0]);
2132 break;
2133 case ir_unop_bit_count:
2134 emit_asm(ir, TGSI_OPCODE_POPC, result_dst, op[0]);
2135 break;
2136 case ir_unop_find_msb:
2137 emit_asm(ir, TGSI_OPCODE_IMSB, result_dst, op[0]);
2138 break;
2139 case ir_unop_find_lsb:
2140 emit_asm(ir, TGSI_OPCODE_LSB, result_dst, op[0]);
2141 break;
2142 case ir_binop_imul_high:
2143 emit_asm(ir, TGSI_OPCODE_IMUL_HI, result_dst, op[0], op[1]);
2144 break;
2145 case ir_triop_fma:
2146 /* In theory, MAD is incorrect here. */
2147 if (have_fma)
2148 emit_asm(ir, TGSI_OPCODE_FMA, result_dst, op[0], op[1], op[2]);
2149 else
2150 emit_asm(ir, TGSI_OPCODE_MAD, result_dst, op[0], op[1], op[2]);
2151 break;
2152 case ir_unop_interpolate_at_centroid:
2153 emit_asm(ir, TGSI_OPCODE_INTERP_CENTROID, result_dst, op[0]);
2154 break;
2155 case ir_binop_interpolate_at_offset:
2156 emit_asm(ir, TGSI_OPCODE_INTERP_OFFSET, result_dst, op[0], op[1]);
2157 break;
2158 case ir_binop_interpolate_at_sample:
2159 emit_asm(ir, TGSI_OPCODE_INTERP_SAMPLE, result_dst, op[0], op[1]);
2160 break;
2161
2162 case ir_unop_d2f:
2163 emit_asm(ir, TGSI_OPCODE_D2F, result_dst, op[0]);
2164 break;
2165 case ir_unop_f2d:
2166 emit_asm(ir, TGSI_OPCODE_F2D, result_dst, op[0]);
2167 break;
2168 case ir_unop_d2i:
2169 emit_asm(ir, TGSI_OPCODE_D2I, result_dst, op[0]);
2170 break;
2171 case ir_unop_i2d:
2172 emit_asm(ir, TGSI_OPCODE_I2D, result_dst, op[0]);
2173 break;
2174 case ir_unop_d2u:
2175 emit_asm(ir, TGSI_OPCODE_D2U, result_dst, op[0]);
2176 break;
2177 case ir_unop_u2d:
2178 emit_asm(ir, TGSI_OPCODE_U2D, result_dst, op[0]);
2179 break;
2180 case ir_unop_unpack_double_2x32:
2181 case ir_unop_pack_double_2x32:
2182 emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
2183 break;
2184
2185 case ir_binop_ldexp:
2186 if (ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE) {
2187 emit_asm(ir, TGSI_OPCODE_DLDEXP, result_dst, op[0], op[1]);
2188 } else {
2189 assert(!"Invalid ldexp for non-double opcode in glsl_to_tgsi_visitor::visit()");
2190 }
2191 break;
2192
2193 case ir_unop_pack_snorm_2x16:
2194 case ir_unop_pack_unorm_2x16:
2195 case ir_unop_pack_half_2x16:
2196 case ir_unop_pack_snorm_4x8:
2197 case ir_unop_pack_unorm_4x8:
2198
2199 case ir_unop_unpack_snorm_2x16:
2200 case ir_unop_unpack_unorm_2x16:
2201 case ir_unop_unpack_half_2x16:
2202 case ir_unop_unpack_half_2x16_split_x:
2203 case ir_unop_unpack_half_2x16_split_y:
2204 case ir_unop_unpack_snorm_4x8:
2205 case ir_unop_unpack_unorm_4x8:
2206
2207 case ir_binop_pack_half_2x16_split:
2208 case ir_binop_bfm:
2209 case ir_triop_bfi:
2210 case ir_quadop_vector:
2211 case ir_binop_vector_extract:
2212 case ir_triop_vector_insert:
2213 case ir_binop_carry:
2214 case ir_binop_borrow:
2215 /* This operation is not supported, or should have already been handled.
2216 */
2217 assert(!"Invalid ir opcode in glsl_to_tgsi_visitor::visit()");
2218 break;
2219 }
2220
2221 this->result = result_src;
2222 }
2223
2224
2225 void
2226 glsl_to_tgsi_visitor::visit(ir_swizzle *ir)
2227 {
2228 st_src_reg src;
2229 int i;
2230 int swizzle[4];
2231
2232 /* Note that this is only swizzles in expressions, not those on the left
2233 * hand side of an assignment, which do write masking. See ir_assignment
2234 * for that.
2235 */
2236
2237 ir->val->accept(this);
2238 src = this->result;
2239 assert(src.file != PROGRAM_UNDEFINED);
2240 assert(ir->type->vector_elements > 0);
2241
2242 for (i = 0; i < 4; i++) {
2243 if (i < ir->type->vector_elements) {
2244 switch (i) {
2245 case 0:
2246 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x);
2247 break;
2248 case 1:
2249 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y);
2250 break;
2251 case 2:
2252 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z);
2253 break;
2254 case 3:
2255 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w);
2256 break;
2257 }
2258 } else {
2259 /* If the type is smaller than a vec4, replicate the last
2260 * channel out.
2261 */
2262 swizzle[i] = swizzle[ir->type->vector_elements - 1];
2263 }
2264 }
2265
2266 src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
2267
2268 this->result = src;
2269 }
2270
2271 /* Test if the variable is an array. Note that geometry and
2272 * tessellation shader inputs are outputs are always arrays (except
2273 * for patch inputs), so only the array element type is considered.
2274 */
2275 static bool
2276 is_inout_array(unsigned stage, ir_variable *var, bool *is_2d)
2277 {
2278 const glsl_type *type = var->type;
2279
2280 if ((stage == MESA_SHADER_VERTEX && var->data.mode == ir_var_shader_in) ||
2281 (stage == MESA_SHADER_FRAGMENT && var->data.mode == ir_var_shader_out))
2282 return false;
2283
2284 *is_2d = false;
2285
2286 if (((stage == MESA_SHADER_GEOMETRY && var->data.mode == ir_var_shader_in) ||
2287 (stage == MESA_SHADER_TESS_EVAL && var->data.mode == ir_var_shader_in) ||
2288 stage == MESA_SHADER_TESS_CTRL) &&
2289 !var->data.patch) {
2290 if (!var->type->is_array())
2291 return false; /* a system value probably */
2292
2293 type = var->type->fields.array;
2294 *is_2d = true;
2295 }
2296
2297 return type->is_array() || type->is_matrix();
2298 }
2299
2300 void
2301 glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
2302 {
2303 variable_storage *entry = find_variable_storage(ir->var);
2304 ir_variable *var = ir->var;
2305 bool is_2d;
2306
2307 if (!entry) {
2308 switch (var->data.mode) {
2309 case ir_var_uniform:
2310 entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
2311 var->data.location);
2312 this->variables.push_tail(entry);
2313 break;
2314 case ir_var_shader_in:
2315 /* The linker assigns locations for varyings and attributes,
2316 * including deprecated builtins (like gl_Color), user-assign
2317 * generic attributes (glBindVertexLocation), and
2318 * user-defined varyings.
2319 */
2320 assert(var->data.location != -1);
2321
2322 if (is_inout_array(shader->Stage, var, &is_2d)) {
2323 struct array_decl *decl = &input_arrays[num_input_arrays];
2324
2325 decl->mesa_index = var->data.location;
2326 decl->array_id = num_input_arrays + 1;
2327 if (is_2d)
2328 decl->array_size = type_size(var->type->fields.array);
2329 else
2330 decl->array_size = type_size(var->type);
2331 num_input_arrays++;
2332
2333 entry = new(mem_ctx) variable_storage(var,
2334 PROGRAM_INPUT,
2335 var->data.location,
2336 decl->array_id);
2337 }
2338 else {
2339 entry = new(mem_ctx) variable_storage(var,
2340 PROGRAM_INPUT,
2341 var->data.location);
2342 }
2343 this->variables.push_tail(entry);
2344 break;
2345 case ir_var_shader_out:
2346 assert(var->data.location != -1);
2347
2348 if (is_inout_array(shader->Stage, var, &is_2d)) {
2349 struct array_decl *decl = &output_arrays[num_output_arrays];
2350
2351 decl->mesa_index = var->data.location;
2352 decl->array_id = num_output_arrays + 1;
2353 if (is_2d)
2354 decl->array_size = type_size(var->type->fields.array);
2355 else
2356 decl->array_size = type_size(var->type);
2357 num_output_arrays++;
2358
2359 entry = new(mem_ctx) variable_storage(var,
2360 PROGRAM_OUTPUT,
2361 var->data.location,
2362 decl->array_id);
2363 }
2364 else {
2365 entry = new(mem_ctx) variable_storage(var,
2366 PROGRAM_OUTPUT,
2367 var->data.location
2368 + var->data.index);
2369 }
2370 this->variables.push_tail(entry);
2371 break;
2372 case ir_var_system_value:
2373 entry = new(mem_ctx) variable_storage(var,
2374 PROGRAM_SYSTEM_VALUE,
2375 var->data.location);
2376 break;
2377 case ir_var_auto:
2378 case ir_var_temporary:
2379 st_src_reg src = get_temp(var->type);
2380
2381 entry = new(mem_ctx) variable_storage(var, src.file, src.index);
2382 this->variables.push_tail(entry);
2383
2384 break;
2385 }
2386
2387 if (!entry) {
2388 printf("Failed to make storage for %s\n", var->name);
2389 exit(1);
2390 }
2391 }
2392
2393 this->result = st_src_reg(entry->file, entry->index, var->type);
2394 this->result.array_id = entry->array_id;
2395 if (!native_integers)
2396 this->result.type = GLSL_TYPE_FLOAT;
2397 }
2398
2399 static void
2400 shrink_array_declarations(struct array_decl *arrays, unsigned count,
2401 GLbitfield64 usage_mask,
2402 GLbitfield patch_usage_mask)
2403 {
2404 unsigned i, j;
2405
2406 /* Fix array declarations by removing unused array elements at both ends
2407 * of the arrays. For example, mat4[3] where only mat[1] is used.
2408 */
2409 for (i = 0; i < count; i++) {
2410 struct array_decl *decl = &arrays[i];
2411
2412 /* Shrink the beginning. */
2413 for (j = 0; j < decl->array_size; j++) {
2414 if (decl->mesa_index >= VARYING_SLOT_PATCH0) {
2415 if (patch_usage_mask &
2416 BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j))
2417 break;
2418 }
2419 else {
2420 if (usage_mask & BITFIELD64_BIT(decl->mesa_index+j))
2421 break;
2422 }
2423
2424 decl->mesa_index++;
2425 decl->array_size--;
2426 j--;
2427 }
2428
2429 /* Shrink the end. */
2430 for (j = decl->array_size-1; j >= 0; j--) {
2431 if (decl->mesa_index >= VARYING_SLOT_PATCH0) {
2432 if (patch_usage_mask &
2433 BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j))
2434 break;
2435 }
2436 else {
2437 if (usage_mask & BITFIELD64_BIT(decl->mesa_index+j))
2438 break;
2439 }
2440
2441 decl->array_size--;
2442 }
2443 }
2444 }
2445
2446 void
2447 glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
2448 {
2449 ir_constant *index;
2450 st_src_reg src;
2451 int element_size = type_size(ir->type);
2452 bool is_2D = false;
2453
2454 index = ir->array_index->constant_expression_value();
2455
2456 ir->array->accept(this);
2457 src = this->result;
2458
2459 if (ir->array->ir_type != ir_type_dereference_array) {
2460 switch (this->prog->Target) {
2461 case GL_TESS_CONTROL_PROGRAM_NV:
2462 is_2D = (src.file == PROGRAM_INPUT || src.file == PROGRAM_OUTPUT) &&
2463 !ir->variable_referenced()->data.patch;
2464 break;
2465 case GL_TESS_EVALUATION_PROGRAM_NV:
2466 is_2D = src.file == PROGRAM_INPUT &&
2467 !ir->variable_referenced()->data.patch;
2468 break;
2469 case GL_GEOMETRY_PROGRAM_NV:
2470 is_2D = src.file == PROGRAM_INPUT;
2471 break;
2472 }
2473 }
2474
2475 if (is_2D)
2476 element_size = 1;
2477
2478 if (index) {
2479 if (is_2D) {
2480 src.index2D = index->value.i[0];
2481 src.has_index2 = true;
2482 } else
2483 src.index += index->value.i[0] * element_size;
2484 } else {
2485 /* Variable index array dereference. It eats the "vec4" of the
2486 * base of the array and an index that offsets the TGSI register
2487 * index.
2488 */
2489 ir->array_index->accept(this);
2490
2491 st_src_reg index_reg;
2492
2493 if (element_size == 1) {
2494 index_reg = this->result;
2495 } else {
2496 index_reg = get_temp(native_integers ?
2497 glsl_type::int_type : glsl_type::float_type);
2498
2499 emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg),
2500 this->result, st_src_reg_for_type(index_reg.type, element_size));
2501 }
2502
2503 /* If there was already a relative address register involved, add the
2504 * new and the old together to get the new offset.
2505 */
2506 if (!is_2D && src.reladdr != NULL) {
2507 st_src_reg accum_reg = get_temp(native_integers ?
2508 glsl_type::int_type : glsl_type::float_type);
2509
2510 emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg),
2511 index_reg, *src.reladdr);
2512
2513 index_reg = accum_reg;
2514 }
2515
2516 if (is_2D) {
2517 src.reladdr2 = ralloc(mem_ctx, st_src_reg);
2518 memcpy(src.reladdr2, &index_reg, sizeof(index_reg));
2519 src.index2D = 0;
2520 src.has_index2 = true;
2521 } else {
2522 src.reladdr = ralloc(mem_ctx, st_src_reg);
2523 memcpy(src.reladdr, &index_reg, sizeof(index_reg));
2524 }
2525 }
2526
2527 /* If the type is smaller than a vec4, replicate the last channel out. */
2528 if (ir->type->is_scalar() || ir->type->is_vector())
2529 src.swizzle = swizzle_for_size(ir->type->vector_elements);
2530 else
2531 src.swizzle = SWIZZLE_NOOP;
2532
2533 /* Change the register type to the element type of the array. */
2534 src.type = ir->type->base_type;
2535
2536 this->result = src;
2537 }
2538
2539 void
2540 glsl_to_tgsi_visitor::visit(ir_dereference_record *ir)
2541 {
2542 unsigned int i;
2543 const glsl_type *struct_type = ir->record->type;
2544 int offset = 0;
2545
2546 ir->record->accept(this);
2547
2548 for (i = 0; i < struct_type->length; i++) {
2549 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
2550 break;
2551 offset += type_size(struct_type->fields.structure[i].type);
2552 }
2553
2554 /* If the type is smaller than a vec4, replicate the last channel out. */
2555 if (ir->type->is_scalar() || ir->type->is_vector())
2556 this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
2557 else
2558 this->result.swizzle = SWIZZLE_NOOP;
2559
2560 this->result.index += offset;
2561 this->result.type = ir->type->base_type;
2562 }
2563
2564 /**
2565 * We want to be careful in assignment setup to hit the actual storage
2566 * instead of potentially using a temporary like we might with the
2567 * ir_dereference handler.
2568 */
2569 static st_dst_reg
2570 get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v)
2571 {
2572 /* The LHS must be a dereference. If the LHS is a variable indexed array
2573 * access of a vector, it must be separated into a series conditional moves
2574 * before reaching this point (see ir_vec_index_to_cond_assign).
2575 */
2576 assert(ir->as_dereference());
2577 ir_dereference_array *deref_array = ir->as_dereference_array();
2578 if (deref_array) {
2579 assert(!deref_array->array->type->is_vector());
2580 }
2581
2582 /* Use the rvalue deref handler for the most part. We'll ignore
2583 * swizzles in it and write swizzles using writemask, though.
2584 */
2585 ir->accept(v);
2586 return st_dst_reg(v->result);
2587 }
2588
2589 /**
2590 * Process the condition of a conditional assignment
2591 *
2592 * Examines the condition of a conditional assignment to generate the optimal
2593 * first operand of a \c CMP instruction. If the condition is a relational
2594 * operator with 0 (e.g., \c ir_binop_less), the value being compared will be
2595 * used as the source for the \c CMP instruction. Otherwise the comparison
2596 * is processed to a boolean result, and the boolean result is used as the
2597 * operand to the CMP instruction.
2598 */
2599 bool
2600 glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir)
2601 {
2602 ir_rvalue *src_ir = ir;
2603 bool negate = true;
2604 bool switch_order = false;
2605
2606 ir_expression *const expr = ir->as_expression();
2607
2608 if (native_integers) {
2609 if ((expr != NULL) && (expr->get_num_operands() == 2)) {
2610 enum glsl_base_type type = expr->operands[0]->type->base_type;
2611 if (type == GLSL_TYPE_INT || type == GLSL_TYPE_UINT ||
2612 type == GLSL_TYPE_BOOL) {
2613 if (expr->operation == ir_binop_equal) {
2614 if (expr->operands[0]->is_zero()) {
2615 src_ir = expr->operands[1];
2616 switch_order = true;
2617 }
2618 else if (expr->operands[1]->is_zero()) {
2619 src_ir = expr->operands[0];
2620 switch_order = true;
2621 }
2622 }
2623 else if (expr->operation == ir_binop_nequal) {
2624 if (expr->operands[0]->is_zero()) {
2625 src_ir = expr->operands[1];
2626 }
2627 else if (expr->operands[1]->is_zero()) {
2628 src_ir = expr->operands[0];
2629 }
2630 }
2631 }
2632 }
2633
2634 src_ir->accept(this);
2635 return switch_order;
2636 }
2637
2638 if ((expr != NULL) && (expr->get_num_operands() == 2)) {
2639 bool zero_on_left = false;
2640
2641 if (expr->operands[0]->is_zero()) {
2642 src_ir = expr->operands[1];
2643 zero_on_left = true;
2644 } else if (expr->operands[1]->is_zero()) {
2645 src_ir = expr->operands[0];
2646 zero_on_left = false;
2647 }
2648
2649 /* a is - 0 + - 0 +
2650 * (a < 0) T F F ( a < 0) T F F
2651 * (0 < a) F F T (-a < 0) F F T
2652 * (a <= 0) T T F (-a < 0) F F T (swap order of other operands)
2653 * (0 <= a) F T T ( a < 0) T F F (swap order of other operands)
2654 * (a > 0) F F T (-a < 0) F F T
2655 * (0 > a) T F F ( a < 0) T F F
2656 * (a >= 0) F T T ( a < 0) T F F (swap order of other operands)
2657 * (0 >= a) T T F (-a < 0) F F T (swap order of other operands)
2658 *
2659 * Note that exchanging the order of 0 and 'a' in the comparison simply
2660 * means that the value of 'a' should be negated.
2661 */
2662 if (src_ir != ir) {
2663 switch (expr->operation) {
2664 case ir_binop_less:
2665 switch_order = false;
2666 negate = zero_on_left;
2667 break;
2668
2669 case ir_binop_greater:
2670 switch_order = false;
2671 negate = !zero_on_left;
2672 break;
2673
2674 case ir_binop_lequal:
2675 switch_order = true;
2676 negate = !zero_on_left;
2677 break;
2678
2679 case ir_binop_gequal:
2680 switch_order = true;
2681 negate = zero_on_left;
2682 break;
2683
2684 default:
2685 /* This isn't the right kind of comparison afterall, so make sure
2686 * the whole condition is visited.
2687 */
2688 src_ir = ir;
2689 break;
2690 }
2691 }
2692 }
2693
2694 src_ir->accept(this);
2695
2696 /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
2697 * condition we produced is 0.0 or 1.0. By flipping the sign, we can
2698 * choose which value TGSI_OPCODE_CMP produces without an extra instruction
2699 * computing the condition.
2700 */
2701 if (negate)
2702 this->result.negate = ~this->result.negate;
2703
2704 return switch_order;
2705 }
2706
2707 void
2708 glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, const struct glsl_type *type,
2709 st_dst_reg *l, st_src_reg *r,
2710 st_src_reg *cond, bool cond_swap)
2711 {
2712 if (type->base_type == GLSL_TYPE_STRUCT) {
2713 for (unsigned int i = 0; i < type->length; i++) {
2714 emit_block_mov(ir, type->fields.structure[i].type, l, r,
2715 cond, cond_swap);
2716 }
2717 return;
2718 }
2719
2720 if (type->is_array()) {
2721 for (unsigned int i = 0; i < type->length; i++) {
2722 emit_block_mov(ir, type->fields.array, l, r, cond, cond_swap);
2723 }
2724 return;
2725 }
2726
2727 if (type->is_matrix()) {
2728 const struct glsl_type *vec_type;
2729
2730 vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
2731 type->vector_elements, 1);
2732
2733 for (int i = 0; i < type->matrix_columns; i++) {
2734 emit_block_mov(ir, vec_type, l, r, cond, cond_swap);
2735 }
2736 return;
2737 }
2738
2739 assert(type->is_scalar() || type->is_vector());
2740
2741 r->type = type->base_type;
2742 if (cond) {
2743 st_src_reg l_src = st_src_reg(*l);
2744 l_src.swizzle = swizzle_for_size(type->vector_elements);
2745
2746 if (native_integers) {
2747 emit_asm(ir, TGSI_OPCODE_UCMP, *l, *cond,
2748 cond_swap ? l_src : *r,
2749 cond_swap ? *r : l_src);
2750 } else {
2751 emit_asm(ir, TGSI_OPCODE_CMP, *l, *cond,
2752 cond_swap ? l_src : *r,
2753 cond_swap ? *r : l_src);
2754 }
2755 } else {
2756 emit_asm(ir, TGSI_OPCODE_MOV, *l, *r);
2757 }
2758 l->index++;
2759 r->index++;
2760 }
2761
2762 void
2763 glsl_to_tgsi_visitor::visit(ir_assignment *ir)
2764 {
2765 st_dst_reg l;
2766 st_src_reg r;
2767
2768 ir->rhs->accept(this);
2769 r = this->result;
2770
2771 l = get_assignment_lhs(ir->lhs, this);
2772
2773 /* FINISHME: This should really set to the correct maximal writemask for each
2774 * FINISHME: component written (in the loops below). This case can only
2775 * FINISHME: occur for matrices, arrays, and structures.
2776 */
2777 if (ir->write_mask == 0) {
2778 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
2779 l.writemask = WRITEMASK_XYZW;
2780 } else if (ir->lhs->type->is_scalar() &&
2781 !ir->lhs->type->is_double() &&
2782 ir->lhs->variable_referenced()->data.mode == ir_var_shader_out) {
2783 /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the
2784 * FINISHME: W component of fragment shader output zero, work correctly.
2785 */
2786 l.writemask = WRITEMASK_XYZW;
2787 } else {
2788 int swizzles[4];
2789 int first_enabled_chan = 0;
2790 int rhs_chan = 0;
2791
2792 l.writemask = ir->write_mask;
2793
2794 for (int i = 0; i < 4; i++) {
2795 if (l.writemask & (1 << i)) {
2796 first_enabled_chan = GET_SWZ(r.swizzle, i);
2797 break;
2798 }
2799 }
2800
2801 /* Swizzle a small RHS vector into the channels being written.
2802 *
2803 * glsl ir treats write_mask as dictating how many channels are
2804 * present on the RHS while TGSI treats write_mask as just
2805 * showing which channels of the vec4 RHS get written.
2806 */
2807 for (int i = 0; i < 4; i++) {
2808 if (l.writemask & (1 << i))
2809 swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++);
2810 else
2811 swizzles[i] = first_enabled_chan;
2812 }
2813 r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1],
2814 swizzles[2], swizzles[3]);
2815 }
2816
2817 assert(l.file != PROGRAM_UNDEFINED);
2818 assert(r.file != PROGRAM_UNDEFINED);
2819
2820 if (ir->condition) {
2821 const bool switch_order = this->process_move_condition(ir->condition);
2822 st_src_reg condition = this->result;
2823
2824 emit_block_mov(ir, ir->lhs->type, &l, &r, &condition, switch_order);
2825 } else if (ir->rhs->as_expression() &&
2826 this->instructions.get_tail() &&
2827 ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir &&
2828 type_size(ir->lhs->type) == 1 &&
2829 l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst[0].writemask) {
2830 /* To avoid emitting an extra MOV when assigning an expression to a
2831 * variable, emit the last instruction of the expression again, but
2832 * replace the destination register with the target of the assignment.
2833 * Dead code elimination will remove the original instruction.
2834 */
2835 glsl_to_tgsi_instruction *inst, *new_inst;
2836 inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
2837 new_inst = emit_asm(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2], inst->src[3]);
2838 new_inst->saturate = inst->saturate;
2839 inst->dead_mask = inst->dst[0].writemask;
2840 } else {
2841 emit_block_mov(ir, ir->rhs->type, &l, &r, NULL, false);
2842 }
2843 }
2844
2845
2846 void
2847 glsl_to_tgsi_visitor::visit(ir_constant *ir)
2848 {
2849 st_src_reg src;
2850 GLdouble stack_vals[4] = { 0 };
2851 gl_constant_value *values = (gl_constant_value *) stack_vals;
2852 GLenum gl_type = GL_NONE;
2853 unsigned int i;
2854 static int in_array = 0;
2855 gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE;
2856
2857 /* Unfortunately, 4 floats is all we can get into
2858 * _mesa_add_typed_unnamed_constant. So, make a temp to store an
2859 * aggregate constant and move each constant value into it. If we
2860 * get lucky, copy propagation will eliminate the extra moves.
2861 */
2862 if (ir->type->base_type == GLSL_TYPE_STRUCT) {
2863 st_src_reg temp_base = get_temp(ir->type);
2864 st_dst_reg temp = st_dst_reg(temp_base);
2865
2866 foreach_in_list(ir_constant, field_value, &ir->components) {
2867 int size = type_size(field_value->type);
2868
2869 assert(size > 0);
2870
2871 field_value->accept(this);
2872 src = this->result;
2873
2874 for (i = 0; i < (unsigned int)size; i++) {
2875 emit_asm(ir, TGSI_OPCODE_MOV, temp, src);
2876
2877 src.index++;
2878 temp.index++;
2879 }
2880 }
2881 this->result = temp_base;
2882 return;
2883 }
2884
2885 if (ir->type->is_array()) {
2886 st_src_reg temp_base = get_temp(ir->type);
2887 st_dst_reg temp = st_dst_reg(temp_base);
2888 int size = type_size(ir->type->fields.array);
2889
2890 assert(size > 0);
2891 in_array++;
2892
2893 for (i = 0; i < ir->type->length; i++) {
2894 ir->array_elements[i]->accept(this);
2895 src = this->result;
2896 for (int j = 0; j < size; j++) {
2897 emit_asm(ir, TGSI_OPCODE_MOV, temp, src);
2898
2899 src.index++;
2900 temp.index++;
2901 }
2902 }
2903 this->result = temp_base;
2904 in_array--;
2905 return;
2906 }
2907
2908 if (ir->type->is_matrix()) {
2909 st_src_reg mat = get_temp(ir->type);
2910 st_dst_reg mat_column = st_dst_reg(mat);
2911
2912 for (i = 0; i < ir->type->matrix_columns; i++) {
2913 assert(ir->type->base_type == GLSL_TYPE_FLOAT);
2914 values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements];
2915
2916 src = st_src_reg(file, -1, ir->type->base_type);
2917 src.index = add_constant(file,
2918 values,
2919 ir->type->vector_elements,
2920 GL_FLOAT,
2921 &src.swizzle);
2922 emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
2923
2924 mat_column.index++;
2925 }
2926
2927 this->result = mat;
2928 return;
2929 }
2930
2931 switch (ir->type->base_type) {
2932 case GLSL_TYPE_FLOAT:
2933 gl_type = GL_FLOAT;
2934 for (i = 0; i < ir->type->vector_elements; i++) {
2935 values[i].f = ir->value.f[i];
2936 }
2937 break;
2938 case GLSL_TYPE_DOUBLE:
2939 gl_type = GL_DOUBLE;
2940 for (i = 0; i < ir->type->vector_elements; i++) {
2941 values[i * 2].i = *(uint32_t *)&ir->value.d[i];
2942 values[i * 2 + 1].i = *(((uint32_t *)&ir->value.d[i]) + 1);
2943 }
2944 break;
2945 case GLSL_TYPE_UINT:
2946 gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT;
2947 for (i = 0; i < ir->type->vector_elements; i++) {
2948 if (native_integers)
2949 values[i].u = ir->value.u[i];
2950 else
2951 values[i].f = ir->value.u[i];
2952 }
2953 break;
2954 case GLSL_TYPE_INT:
2955 gl_type = native_integers ? GL_INT : GL_FLOAT;
2956 for (i = 0; i < ir->type->vector_elements; i++) {
2957 if (native_integers)
2958 values[i].i = ir->value.i[i];
2959 else
2960 values[i].f = ir->value.i[i];
2961 }
2962 break;
2963 case GLSL_TYPE_BOOL:
2964 gl_type = native_integers ? GL_BOOL : GL_FLOAT;
2965 for (i = 0; i < ir->type->vector_elements; i++) {
2966 values[i].u = ir->value.b[i] ? ctx->Const.UniformBooleanTrue : 0;
2967 }
2968 break;
2969 default:
2970 assert(!"Non-float/uint/int/bool constant");
2971 }
2972
2973 this->result = st_src_reg(file, -1, ir->type);
2974 this->result.index = add_constant(file,
2975 values,
2976 ir->type->vector_elements,
2977 gl_type,
2978 &this->result.swizzle);
2979 }
2980
2981 function_entry *
2982 glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig)
2983 {
2984 foreach_in_list_use_after(function_entry, entry, &this->function_signatures) {
2985 if (entry->sig == sig)
2986 return entry;
2987 }
2988
2989 entry = ralloc(mem_ctx, function_entry);
2990 entry->sig = sig;
2991 entry->sig_id = this->next_signature_id++;
2992 entry->bgn_inst = NULL;
2993
2994 /* Allocate storage for all the parameters. */
2995 foreach_in_list(ir_variable, param, &sig->parameters) {
2996 variable_storage *storage;
2997
2998 storage = find_variable_storage(param);
2999 assert(!storage);
3000
3001 st_src_reg src = get_temp(param->type);
3002
3003 storage = new(mem_ctx) variable_storage(param, src.file, src.index);
3004 this->variables.push_tail(storage);
3005 }
3006
3007 if (!sig->return_type->is_void()) {
3008 entry->return_reg = get_temp(sig->return_type);
3009 } else {
3010 entry->return_reg = undef_src;
3011 }
3012
3013 this->function_signatures.push_tail(entry);
3014 return entry;
3015 }
3016
3017 void
3018 glsl_to_tgsi_visitor::visit(ir_call *ir)
3019 {
3020 glsl_to_tgsi_instruction *call_inst;
3021 ir_function_signature *sig = ir->callee;
3022 function_entry *entry = get_function_signature(sig);
3023 int i;
3024
3025 /* Process in parameters. */
3026 foreach_two_lists(formal_node, &sig->parameters,
3027 actual_node, &ir->actual_parameters) {
3028 ir_rvalue *param_rval = (ir_rvalue *) actual_node;
3029 ir_variable *param = (ir_variable *) formal_node;
3030
3031 if (param->data.mode == ir_var_function_in ||
3032 param->data.mode == ir_var_function_inout) {
3033 variable_storage *storage = find_variable_storage(param);
3034 assert(storage);
3035
3036 param_rval->accept(this);
3037 st_src_reg r = this->result;
3038
3039 st_dst_reg l;
3040 l.file = storage->file;
3041 l.index = storage->index;
3042 l.reladdr = NULL;
3043 l.writemask = WRITEMASK_XYZW;
3044 l.cond_mask = COND_TR;
3045
3046 for (i = 0; i < type_size(param->type); i++) {
3047 emit_asm(ir, TGSI_OPCODE_MOV, l, r);
3048 l.index++;
3049 r.index++;
3050 }
3051 }
3052 }
3053
3054 /* Emit call instruction */
3055 call_inst = emit_asm(ir, TGSI_OPCODE_CAL);
3056 call_inst->function = entry;
3057
3058 /* Process out parameters. */
3059 foreach_two_lists(formal_node, &sig->parameters,
3060 actual_node, &ir->actual_parameters) {
3061 ir_rvalue *param_rval = (ir_rvalue *) actual_node;
3062 ir_variable *param = (ir_variable *) formal_node;
3063
3064 if (param->data.mode == ir_var_function_out ||
3065 param->data.mode == ir_var_function_inout) {
3066 variable_storage *storage = find_variable_storage(param);
3067 assert(storage);
3068
3069 st_src_reg r;
3070 r.file = storage->file;
3071 r.index = storage->index;
3072 r.reladdr = NULL;
3073 r.swizzle = SWIZZLE_NOOP;
3074 r.negate = 0;
3075
3076 param_rval->accept(this);
3077 st_dst_reg l = st_dst_reg(this->result);
3078
3079 for (i = 0; i < type_size(param->type); i++) {
3080 emit_asm(ir, TGSI_OPCODE_MOV, l, r);
3081 l.index++;
3082 r.index++;
3083 }
3084 }
3085 }
3086
3087 /* Process return value. */
3088 this->result = entry->return_reg;
3089 }
3090
3091 void
3092 glsl_to_tgsi_visitor::visit(ir_texture *ir)
3093 {
3094 st_src_reg result_src, coord, cube_sc, lod_info, projector, dx, dy;
3095 st_src_reg offset[MAX_GLSL_TEXTURE_OFFSET], sample_index, component;
3096 st_src_reg levels_src;
3097 st_dst_reg result_dst, coord_dst, cube_sc_dst;
3098 glsl_to_tgsi_instruction *inst = NULL;
3099 unsigned opcode = TGSI_OPCODE_NOP;
3100 const glsl_type *sampler_type = ir->sampler->type;
3101 ir_rvalue *sampler_index =
3102 _mesa_get_sampler_array_nonconst_index(ir->sampler);
3103 bool is_cube_array = false;
3104 unsigned i;
3105
3106 /* if we are a cube array sampler */
3107 if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
3108 sampler_type->sampler_array)) {
3109 is_cube_array = true;
3110 }
3111
3112 if (ir->coordinate) {
3113 ir->coordinate->accept(this);
3114
3115 /* Put our coords in a temp. We'll need to modify them for shadow,
3116 * projection, or LOD, so the only case we'd use it as is is if
3117 * we're doing plain old texturing. The optimization passes on
3118 * glsl_to_tgsi_visitor should handle cleaning up our mess in that case.
3119 */
3120 coord = get_temp(glsl_type::vec4_type);
3121 coord_dst = st_dst_reg(coord);
3122 coord_dst.writemask = (1 << ir->coordinate->type->vector_elements) - 1;
3123 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
3124 }
3125
3126 if (ir->projector) {
3127 ir->projector->accept(this);
3128 projector = this->result;
3129 }
3130
3131 /* Storage for our result. Ideally for an assignment we'd be using
3132 * the actual storage for the result here, instead.
3133 */
3134 result_src = get_temp(ir->type);
3135 result_dst = st_dst_reg(result_src);
3136
3137 switch (ir->op) {
3138 case ir_tex:
3139 opcode = (is_cube_array && ir->shadow_comparitor) ? TGSI_OPCODE_TEX2 : TGSI_OPCODE_TEX;
3140 if (ir->offset) {
3141 ir->offset->accept(this);
3142 offset[0] = this->result;
3143 }
3144 break;
3145 case ir_txb:
3146 if (is_cube_array ||
3147 sampler_type == glsl_type::samplerCubeShadow_type) {
3148 opcode = TGSI_OPCODE_TXB2;
3149 }
3150 else {
3151 opcode = TGSI_OPCODE_TXB;
3152 }
3153 ir->lod_info.bias->accept(this);
3154 lod_info = this->result;
3155 if (ir->offset) {
3156 ir->offset->accept(this);
3157 offset[0] = this->result;
3158 }
3159 break;
3160 case ir_txl:
3161 opcode = is_cube_array ? TGSI_OPCODE_TXL2 : TGSI_OPCODE_TXL;
3162 ir->lod_info.lod->accept(this);
3163 lod_info = this->result;
3164 if (ir->offset) {
3165 ir->offset->accept(this);
3166 offset[0] = this->result;
3167 }
3168 break;
3169 case ir_txd:
3170 opcode = TGSI_OPCODE_TXD;
3171 ir->lod_info.grad.dPdx->accept(this);
3172 dx = this->result;
3173 ir->lod_info.grad.dPdy->accept(this);
3174 dy = this->result;
3175 if (ir->offset) {
3176 ir->offset->accept(this);
3177 offset[0] = this->result;
3178 }
3179 break;
3180 case ir_txs:
3181 opcode = TGSI_OPCODE_TXQ;
3182 ir->lod_info.lod->accept(this);
3183 lod_info = this->result;
3184 break;
3185 case ir_query_levels:
3186 opcode = TGSI_OPCODE_TXQ;
3187 lod_info = undef_src;
3188 levels_src = get_temp(ir->type);
3189 break;
3190 case ir_txf:
3191 opcode = TGSI_OPCODE_TXF;
3192 ir->lod_info.lod->accept(this);
3193 lod_info = this->result;
3194 if (ir->offset) {
3195 ir->offset->accept(this);
3196 offset[0] = this->result;
3197 }
3198 break;
3199 case ir_txf_ms:
3200 opcode = TGSI_OPCODE_TXF;
3201 ir->lod_info.sample_index->accept(this);
3202 sample_index = this->result;
3203 break;
3204 case ir_tg4:
3205 opcode = TGSI_OPCODE_TG4;
3206 ir->lod_info.component->accept(this);
3207 component = this->result;
3208 if (ir->offset) {
3209 ir->offset->accept(this);
3210 if (ir->offset->type->base_type == GLSL_TYPE_ARRAY) {
3211 const glsl_type *elt_type = ir->offset->type->fields.array;
3212 for (i = 0; i < ir->offset->type->length; i++) {
3213 offset[i] = this->result;
3214 offset[i].index += i * type_size(elt_type);
3215 offset[i].type = elt_type->base_type;
3216 offset[i].swizzle = swizzle_for_size(elt_type->vector_elements);
3217 }
3218 } else {
3219 offset[0] = this->result;
3220 }
3221 }
3222 break;
3223 case ir_lod:
3224 opcode = TGSI_OPCODE_LODQ;
3225 break;
3226 }
3227
3228 if (ir->projector) {
3229 if (opcode == TGSI_OPCODE_TEX) {
3230 /* Slot the projector in as the last component of the coord. */
3231 coord_dst.writemask = WRITEMASK_W;
3232 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, projector);
3233 coord_dst.writemask = WRITEMASK_XYZW;
3234 opcode = TGSI_OPCODE_TXP;
3235 } else {
3236 st_src_reg coord_w = coord;
3237 coord_w.swizzle = SWIZZLE_WWWW;
3238
3239 /* For the other TEX opcodes there's no projective version
3240 * since the last slot is taken up by LOD info. Do the
3241 * projective divide now.
3242 */
3243 coord_dst.writemask = WRITEMASK_W;
3244 emit_asm(ir, TGSI_OPCODE_RCP, coord_dst, projector);
3245
3246 /* In the case where we have to project the coordinates "by hand,"
3247 * the shadow comparator value must also be projected.
3248 */
3249 st_src_reg tmp_src = coord;
3250 if (ir->shadow_comparitor) {
3251 /* Slot the shadow value in as the second to last component of the
3252 * coord.
3253 */
3254 ir->shadow_comparitor->accept(this);
3255
3256 tmp_src = get_temp(glsl_type::vec4_type);
3257 st_dst_reg tmp_dst = st_dst_reg(tmp_src);
3258
3259 /* Projective division not allowed for array samplers. */
3260 assert(!sampler_type->sampler_array);
3261
3262 tmp_dst.writemask = WRITEMASK_Z;
3263 emit_asm(ir, TGSI_OPCODE_MOV, tmp_dst, this->result);
3264
3265 tmp_dst.writemask = WRITEMASK_XY;
3266 emit_asm(ir, TGSI_OPCODE_MOV, tmp_dst, coord);
3267 }
3268
3269 coord_dst.writemask = WRITEMASK_XYZ;
3270 emit_asm(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w);
3271
3272 coord_dst.writemask = WRITEMASK_XYZW;
3273 coord.swizzle = SWIZZLE_XYZW;
3274 }
3275 }
3276
3277 /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow
3278 * comparator was put in the correct place (and projected) by the code,
3279 * above, that handles by-hand projection.
3280 */
3281 if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) {
3282 /* Slot the shadow value in as the second to last component of the
3283 * coord.
3284 */
3285 ir->shadow_comparitor->accept(this);
3286
3287 if (is_cube_array) {
3288 cube_sc = get_temp(glsl_type::float_type);
3289 cube_sc_dst = st_dst_reg(cube_sc);
3290 cube_sc_dst.writemask = WRITEMASK_X;
3291 emit_asm(ir, TGSI_OPCODE_MOV, cube_sc_dst, this->result);
3292 cube_sc_dst.writemask = WRITEMASK_X;
3293 }
3294 else {
3295 if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D &&
3296 sampler_type->sampler_array) ||
3297 sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) {
3298 coord_dst.writemask = WRITEMASK_W;
3299 } else {
3300 coord_dst.writemask = WRITEMASK_Z;
3301 }
3302 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
3303 coord_dst.writemask = WRITEMASK_XYZW;
3304 }
3305 }
3306
3307 if (ir->op == ir_txf_ms) {
3308 coord_dst.writemask = WRITEMASK_W;
3309 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, sample_index);
3310 coord_dst.writemask = WRITEMASK_XYZW;
3311 } else if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB ||
3312 opcode == TGSI_OPCODE_TXF) {
3313 /* TGSI stores LOD or LOD bias in the last channel of the coords. */
3314 coord_dst.writemask = WRITEMASK_W;
3315 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, lod_info);
3316 coord_dst.writemask = WRITEMASK_XYZW;
3317 }
3318
3319 if (sampler_index) {
3320 sampler_index->accept(this);
3321 emit_arl(ir, sampler_reladdr, this->result);
3322 }
3323
3324 if (opcode == TGSI_OPCODE_TXD)
3325 inst = emit_asm(ir, opcode, result_dst, coord, dx, dy);
3326 else if (opcode == TGSI_OPCODE_TXQ) {
3327 if (ir->op == ir_query_levels) {
3328 /* the level is stored in W */
3329 inst = emit_asm(ir, opcode, st_dst_reg(levels_src), lod_info);
3330 result_dst.writemask = WRITEMASK_X;
3331 levels_src.swizzle = SWIZZLE_WWWW;
3332 emit_asm(ir, TGSI_OPCODE_MOV, result_dst, levels_src);
3333 } else
3334 inst = emit_asm(ir, opcode, result_dst, lod_info);
3335 } else if (opcode == TGSI_OPCODE_TXF) {
3336 inst = emit_asm(ir, opcode, result_dst, coord);
3337 } else if (opcode == TGSI_OPCODE_TXL2 || opcode == TGSI_OPCODE_TXB2) {
3338 inst = emit_asm(ir, opcode, result_dst, coord, lod_info);
3339 } else if (opcode == TGSI_OPCODE_TEX2) {
3340 inst = emit_asm(ir, opcode, result_dst, coord, cube_sc);
3341 } else if (opcode == TGSI_OPCODE_TG4) {
3342 if (is_cube_array && ir->shadow_comparitor) {
3343 inst = emit_asm(ir, opcode, result_dst, coord, cube_sc);
3344 } else {
3345 inst = emit_asm(ir, opcode, result_dst, coord, component);
3346 }
3347 } else
3348 inst = emit_asm(ir, opcode, result_dst, coord);
3349
3350 if (ir->shadow_comparitor)
3351 inst->tex_shadow = GL_TRUE;
3352
3353 inst->sampler.index = _mesa_get_sampler_uniform_value(ir->sampler,
3354 this->shader_program,
3355 this->prog);
3356 if (sampler_index) {
3357 inst->sampler.reladdr = ralloc(mem_ctx, st_src_reg);
3358 memcpy(inst->sampler.reladdr, &sampler_reladdr, sizeof(sampler_reladdr));
3359 inst->sampler_array_size =
3360 ir->sampler->as_dereference_array()->array->type->array_size();
3361 } else {
3362 inst->sampler_array_size = 1;
3363 }
3364
3365 if (ir->offset) {
3366 for (i = 0; i < MAX_GLSL_TEXTURE_OFFSET && offset[i].file != PROGRAM_UNDEFINED; i++)
3367 inst->tex_offsets[i] = offset[i];
3368 inst->tex_offset_num_offset = i;
3369 }
3370
3371 switch (sampler_type->sampler_dimensionality) {
3372 case GLSL_SAMPLER_DIM_1D:
3373 inst->tex_target = (sampler_type->sampler_array)
3374 ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
3375 break;
3376 case GLSL_SAMPLER_DIM_2D:
3377 inst->tex_target = (sampler_type->sampler_array)
3378 ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
3379 break;
3380 case GLSL_SAMPLER_DIM_3D:
3381 inst->tex_target = TEXTURE_3D_INDEX;
3382 break;
3383 case GLSL_SAMPLER_DIM_CUBE:
3384 inst->tex_target = (sampler_type->sampler_array)
3385 ? TEXTURE_CUBE_ARRAY_INDEX : TEXTURE_CUBE_INDEX;
3386 break;
3387 case GLSL_SAMPLER_DIM_RECT:
3388 inst->tex_target = TEXTURE_RECT_INDEX;
3389 break;
3390 case GLSL_SAMPLER_DIM_BUF:
3391 inst->tex_target = TEXTURE_BUFFER_INDEX;
3392 break;
3393 case GLSL_SAMPLER_DIM_EXTERNAL:
3394 inst->tex_target = TEXTURE_EXTERNAL_INDEX;
3395 break;
3396 case GLSL_SAMPLER_DIM_MS:
3397 inst->tex_target = (sampler_type->sampler_array)
3398 ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : TEXTURE_2D_MULTISAMPLE_INDEX;
3399 break;
3400 default:
3401 assert(!"Should not get here.");
3402 }
3403
3404 inst->tex_type = ir->type->base_type;
3405
3406 this->result = result_src;
3407 }
3408
3409 void
3410 glsl_to_tgsi_visitor::visit(ir_return *ir)
3411 {
3412 if (ir->get_value()) {
3413 st_dst_reg l;
3414 int i;
3415
3416 assert(current_function);
3417
3418 ir->get_value()->accept(this);
3419 st_src_reg r = this->result;
3420
3421 l = st_dst_reg(current_function->return_reg);
3422
3423 for (i = 0; i < type_size(current_function->sig->return_type); i++) {
3424 emit_asm(ir, TGSI_OPCODE_MOV, l, r);
3425 l.index++;
3426 r.index++;
3427 }
3428 }
3429
3430 emit_asm(ir, TGSI_OPCODE_RET);
3431 }
3432
3433 void
3434 glsl_to_tgsi_visitor::visit(ir_discard *ir)
3435 {
3436 if (ir->condition) {
3437 ir->condition->accept(this);
3438 st_src_reg condition = this->result;
3439
3440 /* Convert the bool condition to a float so we can negate. */
3441 if (native_integers) {
3442 st_src_reg temp = get_temp(ir->condition->type);
3443 emit_asm(ir, TGSI_OPCODE_AND, st_dst_reg(temp),
3444 condition, st_src_reg_for_float(1.0));
3445 condition = temp;
3446 }
3447
3448 condition.negate = ~condition.negate;
3449 emit_asm(ir, TGSI_OPCODE_KILL_IF, undef_dst, condition);
3450 } else {
3451 /* unconditional kil */
3452 emit_asm(ir, TGSI_OPCODE_KILL);
3453 }
3454 }
3455
3456 void
3457 glsl_to_tgsi_visitor::visit(ir_if *ir)
3458 {
3459 unsigned if_opcode;
3460 glsl_to_tgsi_instruction *if_inst;
3461
3462 ir->condition->accept(this);
3463 assert(this->result.file != PROGRAM_UNDEFINED);
3464
3465 if_opcode = native_integers ? TGSI_OPCODE_UIF : TGSI_OPCODE_IF;
3466
3467 if_inst = emit_asm(ir->condition, if_opcode, undef_dst, this->result);
3468
3469 this->instructions.push_tail(if_inst);
3470
3471 visit_exec_list(&ir->then_instructions, this);
3472
3473 if (!ir->else_instructions.is_empty()) {
3474 emit_asm(ir->condition, TGSI_OPCODE_ELSE);
3475 visit_exec_list(&ir->else_instructions, this);
3476 }
3477
3478 if_inst = emit_asm(ir->condition, TGSI_OPCODE_ENDIF);
3479 }
3480
3481
3482 void
3483 glsl_to_tgsi_visitor::visit(ir_emit_vertex *ir)
3484 {
3485 assert(this->prog->Target == GL_GEOMETRY_PROGRAM_NV);
3486
3487 ir->stream->accept(this);
3488 emit_asm(ir, TGSI_OPCODE_EMIT, undef_dst, this->result);
3489 }
3490
3491 void
3492 glsl_to_tgsi_visitor::visit(ir_end_primitive *ir)
3493 {
3494 assert(this->prog->Target == GL_GEOMETRY_PROGRAM_NV);
3495
3496 ir->stream->accept(this);
3497 emit_asm(ir, TGSI_OPCODE_ENDPRIM, undef_dst, this->result);
3498 }
3499
3500 void
3501 glsl_to_tgsi_visitor::visit(ir_barrier *ir)
3502 {
3503 assert(this->prog->Target == GL_TESS_CONTROL_PROGRAM_NV ||
3504 this->prog->Target == GL_COMPUTE_PROGRAM_NV);
3505
3506 emit_asm(ir, TGSI_OPCODE_BARRIER);
3507 }
3508
3509 glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
3510 {
3511 result.file = PROGRAM_UNDEFINED;
3512 next_temp = 1;
3513 array_sizes = NULL;
3514 max_num_arrays = 0;
3515 next_array = 0;
3516 num_input_arrays = 0;
3517 num_output_arrays = 0;
3518 next_signature_id = 1;
3519 num_immediates = 0;
3520 current_function = NULL;
3521 num_address_regs = 0;
3522 samplers_used = 0;
3523 indirect_addr_consts = false;
3524 wpos_transform_const = -1;
3525 glsl_version = 0;
3526 native_integers = false;
3527 mem_ctx = ralloc_context(NULL);
3528 ctx = NULL;
3529 prog = NULL;
3530 shader_program = NULL;
3531 shader = NULL;
3532 options = NULL;
3533 have_sqrt = false;
3534 have_fma = false;
3535 }
3536
3537 glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor()
3538 {
3539 free(array_sizes);
3540 ralloc_free(mem_ctx);
3541 }
3542
3543 extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v)
3544 {
3545 delete v;
3546 }
3547
3548
3549 /**
3550 * Count resources used by the given gpu program (number of texture
3551 * samplers, etc).
3552 */
3553 static void
3554 count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
3555 {
3556 v->samplers_used = 0;
3557
3558 foreach_in_list(glsl_to_tgsi_instruction, inst, &v->instructions) {
3559 if (inst->info->is_tex) {
3560 for (int i = 0; i < inst->sampler_array_size; i++) {
3561 unsigned idx = inst->sampler.index + i;
3562 v->samplers_used |= 1 << idx;
3563
3564 debug_assert(idx < (int)ARRAY_SIZE(v->sampler_types));
3565 v->sampler_types[idx] = inst->tex_type;
3566 v->sampler_targets[idx] =
3567 st_translate_texture_target(inst->tex_target, inst->tex_shadow);
3568
3569 if (inst->tex_shadow) {
3570 prog->ShadowSamplers |= 1 << (inst->sampler.index + i);
3571 }
3572 }
3573 }
3574 }
3575 prog->SamplersUsed = v->samplers_used;
3576
3577 if (v->shader_program != NULL)
3578 _mesa_update_shader_textures_used(v->shader_program, prog);
3579 }
3580
3581 /**
3582 * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
3583 * are read from the given src in this instruction
3584 */
3585 static int
3586 get_src_arg_mask(st_dst_reg dst, st_src_reg src)
3587 {
3588 int read_mask = 0, comp;
3589
3590 /* Now, given the src swizzle and the written channels, find which
3591 * components are actually read
3592 */
3593 for (comp = 0; comp < 4; ++comp) {
3594 const unsigned coord = GET_SWZ(src.swizzle, comp);
3595 assert(coord < 4);
3596 if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W)
3597 read_mask |= 1 << coord;
3598 }
3599
3600 return read_mask;
3601 }
3602
3603 /**
3604 * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP
3605 * instruction is the first instruction to write to register T0. There are
3606 * several lowering passes done in GLSL IR (e.g. branches and
3607 * relative addressing) that create a large number of conditional assignments
3608 * that ir_to_mesa converts to CMP instructions like the one mentioned above.
3609 *
3610 * Here is why this conversion is safe:
3611 * CMP T0, T1 T2 T0 can be expanded to:
3612 * if (T1 < 0.0)
3613 * MOV T0, T2;
3614 * else
3615 * MOV T0, T0;
3616 *
3617 * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same
3618 * as the original program. If (T1 < 0.0) evaluates to false, executing
3619 * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized.
3620 * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2
3621 * because any instruction that was going to read from T0 after this was going
3622 * to read a garbage value anyway.
3623 */
3624 void
3625 glsl_to_tgsi_visitor::simplify_cmp(void)
3626 {
3627 int tempWritesSize = 0;
3628 unsigned *tempWrites = NULL;
3629 unsigned outputWrites[VARYING_SLOT_TESS_MAX];
3630
3631 memset(outputWrites, 0, sizeof(outputWrites));
3632
3633 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
3634 unsigned prevWriteMask = 0;
3635
3636 /* Give up if we encounter relative addressing or flow control. */
3637 if (inst->dst[0].reladdr || inst->dst[0].reladdr2 ||
3638 inst->dst[1].reladdr || inst->dst[1].reladdr2 ||
3639 tgsi_get_opcode_info(inst->op)->is_branch ||
3640 inst->op == TGSI_OPCODE_BGNSUB ||
3641 inst->op == TGSI_OPCODE_CONT ||
3642 inst->op == TGSI_OPCODE_END ||
3643 inst->op == TGSI_OPCODE_ENDSUB ||
3644 inst->op == TGSI_OPCODE_RET) {
3645 break;
3646 }
3647
3648 if (inst->dst[0].file == PROGRAM_OUTPUT) {
3649 assert(inst->dst[0].index < (signed)ARRAY_SIZE(outputWrites));
3650 prevWriteMask = outputWrites[inst->dst[0].index];
3651 outputWrites[inst->dst[0].index] |= inst->dst[0].writemask;
3652 } else if (inst->dst[0].file == PROGRAM_TEMPORARY) {
3653 if (inst->dst[0].index >= tempWritesSize) {
3654 const int inc = 4096;
3655
3656 tempWrites = (unsigned*)
3657 realloc(tempWrites,
3658 (tempWritesSize + inc) * sizeof(unsigned));
3659 if (!tempWrites)
3660 return;
3661
3662 memset(tempWrites + tempWritesSize, 0, inc * sizeof(unsigned));
3663 tempWritesSize += inc;
3664 }
3665
3666 prevWriteMask = tempWrites[inst->dst[0].index];
3667 tempWrites[inst->dst[0].index] |= inst->dst[0].writemask;
3668 } else
3669 continue;
3670
3671 /* For a CMP to be considered a conditional write, the destination
3672 * register and source register two must be the same. */
3673 if (inst->op == TGSI_OPCODE_CMP
3674 && !(inst->dst[0].writemask & prevWriteMask)
3675 && inst->src[2].file == inst->dst[0].file
3676 && inst->src[2].index == inst->dst[0].index
3677 && inst->dst[0].writemask == get_src_arg_mask(inst->dst[0], inst->src[2])) {
3678
3679 inst->op = TGSI_OPCODE_MOV;
3680 inst->src[0] = inst->src[1];
3681 }
3682 }
3683
3684 free(tempWrites);
3685 }
3686
3687 /* Replaces all references to a temporary register index with another index. */
3688 void
3689 glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index)
3690 {
3691 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
3692 unsigned j;
3693
3694 for (j = 0; j < num_inst_src_regs(inst); j++) {
3695 if (inst->src[j].file == PROGRAM_TEMPORARY &&
3696 inst->src[j].index == index) {
3697 inst->src[j].index = new_index;
3698 }
3699 }
3700
3701 for (j = 0; j < inst->tex_offset_num_offset; j++) {
3702 if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY &&
3703 inst->tex_offsets[j].index == index) {
3704 inst->tex_offsets[j].index = new_index;
3705 }
3706 }
3707
3708 for (j = 0; j < num_inst_dst_regs(inst); j++) {
3709 if (inst->dst[j].file == PROGRAM_TEMPORARY && inst->dst[j].index == index) {
3710 inst->dst[j].index = new_index;
3711 }
3712 }
3713 }
3714 }
3715
3716 void
3717 glsl_to_tgsi_visitor::get_first_temp_read(int *first_reads)
3718 {
3719 int depth = 0; /* loop depth */
3720 int loop_start = -1; /* index of the first active BGNLOOP (if any) */
3721 unsigned i = 0, j;
3722
3723 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
3724 for (j = 0; j < num_inst_src_regs(inst); j++) {
3725 if (inst->src[j].file == PROGRAM_TEMPORARY) {
3726 if (first_reads[inst->src[j].index] == -1)
3727 first_reads[inst->src[j].index] = (depth == 0) ? i : loop_start;
3728 }
3729 }
3730 for (j = 0; j < inst->tex_offset_num_offset; j++) {
3731 if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) {
3732 if (first_reads[inst->tex_offsets[j].index] == -1)
3733 first_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : loop_start;
3734 }
3735 }
3736 if (inst->op == TGSI_OPCODE_BGNLOOP) {
3737 if(depth++ == 0)
3738 loop_start = i;
3739 } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
3740 if (--depth == 0)
3741 loop_start = -1;
3742 }
3743 assert(depth >= 0);
3744 i++;
3745 }
3746 }
3747
3748 void
3749 glsl_to_tgsi_visitor::get_last_temp_read_first_temp_write(int *last_reads, int *first_writes)
3750 {
3751 int depth = 0; /* loop depth */
3752 int loop_start = -1; /* index of the first active BGNLOOP (if any) */
3753 unsigned i = 0, j;
3754 int k;
3755 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
3756 for (j = 0; j < num_inst_src_regs(inst); j++) {
3757 if (inst->src[j].file == PROGRAM_TEMPORARY)
3758 last_reads[inst->src[j].index] = (depth == 0) ? i : -2;
3759 }
3760 for (j = 0; j < num_inst_dst_regs(inst); j++) {
3761 if (inst->dst[j].file == PROGRAM_TEMPORARY)
3762 if (first_writes[inst->dst[j].index] == -1)
3763 first_writes[inst->dst[j].index] = (depth == 0) ? i : loop_start;
3764 }
3765 for (j = 0; j < inst->tex_offset_num_offset; j++) {
3766 if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY)
3767 last_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : -2;
3768 }
3769 if (inst->op == TGSI_OPCODE_BGNLOOP) {
3770 if(depth++ == 0)
3771 loop_start = i;
3772 } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
3773 if (--depth == 0) {
3774 loop_start = -1;
3775 for (k = 0; k < this->next_temp; k++) {
3776 if (last_reads[k] == -2) {
3777 last_reads[k] = i;
3778 }
3779 }
3780 }
3781 }
3782 assert(depth >= 0);
3783 i++;
3784 }
3785 }
3786
3787 void
3788 glsl_to_tgsi_visitor::get_last_temp_write(int *last_writes)
3789 {
3790 int depth = 0; /* loop depth */
3791 int i = 0, k;
3792 unsigned j;
3793
3794 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
3795 for (j = 0; j < num_inst_dst_regs(inst); j++) {
3796 if (inst->dst[j].file == PROGRAM_TEMPORARY)
3797 last_writes[inst->dst[j].index] = (depth == 0) ? i : -2;
3798 }
3799
3800 if (inst->op == TGSI_OPCODE_BGNLOOP)
3801 depth++;
3802 else if (inst->op == TGSI_OPCODE_ENDLOOP)
3803 if (--depth == 0) {
3804 for (k = 0; k < this->next_temp; k++) {
3805 if (last_writes[k] == -2) {
3806 last_writes[k] = i;
3807 }
3808 }
3809 }
3810 assert(depth >= 0);
3811 i++;
3812 }
3813 }
3814
3815 /*
3816 * On a basic block basis, tracks available PROGRAM_TEMPORARY register
3817 * channels for copy propagation and updates following instructions to
3818 * use the original versions.
3819 *
3820 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
3821 * will occur. As an example, a TXP production before this pass:
3822 *
3823 * 0: MOV TEMP[1], INPUT[4].xyyy;
3824 * 1: MOV TEMP[1].w, INPUT[4].wwww;
3825 * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
3826 *
3827 * and after:
3828 *
3829 * 0: MOV TEMP[1], INPUT[4].xyyy;
3830 * 1: MOV TEMP[1].w, INPUT[4].wwww;
3831 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
3832 *
3833 * which allows for dead code elimination on TEMP[1]'s writes.
3834 */
3835 void
3836 glsl_to_tgsi_visitor::copy_propagate(void)
3837 {
3838 glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx,
3839 glsl_to_tgsi_instruction *,
3840 this->next_temp * 4);
3841 int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
3842 int level = 0;
3843
3844 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
3845 assert(inst->dst[0].file != PROGRAM_TEMPORARY
3846 || inst->dst[0].index < this->next_temp);
3847
3848 /* First, do any copy propagation possible into the src regs. */
3849 for (int r = 0; r < 3; r++) {
3850 glsl_to_tgsi_instruction *first = NULL;
3851 bool good = true;
3852 int acp_base = inst->src[r].index * 4;
3853
3854 if (inst->src[r].file != PROGRAM_TEMPORARY ||
3855 inst->src[r].reladdr ||
3856 inst->src[r].reladdr2)
3857 continue;
3858
3859 /* See if we can find entries in the ACP consisting of MOVs
3860 * from the same src register for all the swizzled channels
3861 * of this src register reference.
3862 */
3863 for (int i = 0; i < 4; i++) {
3864 int src_chan = GET_SWZ(inst->src[r].swizzle, i);
3865 glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan];
3866
3867 if (!copy_chan) {
3868 good = false;
3869 break;
3870 }
3871
3872 assert(acp_level[acp_base + src_chan] <= level);
3873
3874 if (!first) {
3875 first = copy_chan;
3876 } else {
3877 if (first->src[0].file != copy_chan->src[0].file ||
3878 first->src[0].index != copy_chan->src[0].index ||
3879 first->src[0].double_reg2 != copy_chan->src[0].double_reg2 ||
3880 first->src[0].index2D != copy_chan->src[0].index2D) {
3881 good = false;
3882 break;
3883 }
3884 }
3885 }
3886
3887 if (good) {
3888 /* We've now validated that we can copy-propagate to
3889 * replace this src register reference. Do it.
3890 */
3891 inst->src[r].file = first->src[0].file;
3892 inst->src[r].index = first->src[0].index;
3893 inst->src[r].index2D = first->src[0].index2D;
3894 inst->src[r].has_index2 = first->src[0].has_index2;
3895 inst->src[r].double_reg2 = first->src[0].double_reg2;
3896 inst->src[r].array_id = first->src[0].array_id;
3897
3898 int swizzle = 0;
3899 for (int i = 0; i < 4; i++) {
3900 int src_chan = GET_SWZ(inst->src[r].swizzle, i);
3901 glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan];
3902 swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) << (3 * i));
3903 }
3904 inst->src[r].swizzle = swizzle;
3905 }
3906 }
3907
3908 switch (inst->op) {
3909 case TGSI_OPCODE_BGNLOOP:
3910 case TGSI_OPCODE_ENDLOOP:
3911 /* End of a basic block, clear the ACP entirely. */
3912 memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
3913 break;
3914
3915 case TGSI_OPCODE_IF:
3916 case TGSI_OPCODE_UIF:
3917 ++level;
3918 break;
3919
3920 case TGSI_OPCODE_ENDIF:
3921 case TGSI_OPCODE_ELSE:
3922 /* Clear all channels written inside the block from the ACP, but
3923 * leaving those that were not touched.
3924 */
3925 for (int r = 0; r < this->next_temp; r++) {
3926 for (int c = 0; c < 4; c++) {
3927 if (!acp[4 * r + c])
3928 continue;
3929
3930 if (acp_level[4 * r + c] >= level)
3931 acp[4 * r + c] = NULL;
3932 }
3933 }
3934 if (inst->op == TGSI_OPCODE_ENDIF)
3935 --level;
3936 break;
3937
3938 default:
3939 /* Continuing the block, clear any written channels from
3940 * the ACP.
3941 */
3942 for (int d = 0; d < 2; d++) {
3943 if (inst->dst[d].file == PROGRAM_TEMPORARY && inst->dst[d].reladdr) {
3944 /* Any temporary might be written, so no copy propagation
3945 * across this instruction.
3946 */
3947 memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
3948 } else if (inst->dst[d].file == PROGRAM_OUTPUT &&
3949 inst->dst[d].reladdr) {
3950 /* Any output might be written, so no copy propagation
3951 * from outputs across this instruction.
3952 */
3953 for (int r = 0; r < this->next_temp; r++) {
3954 for (int c = 0; c < 4; c++) {
3955 if (!acp[4 * r + c])
3956 continue;
3957
3958 if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT)
3959 acp[4 * r + c] = NULL;
3960 }
3961 }
3962 } else if (inst->dst[d].file == PROGRAM_TEMPORARY ||
3963 inst->dst[d].file == PROGRAM_OUTPUT) {
3964 /* Clear where it's used as dst. */
3965 if (inst->dst[d].file == PROGRAM_TEMPORARY) {
3966 for (int c = 0; c < 4; c++) {
3967 if (inst->dst[d].writemask & (1 << c))
3968 acp[4 * inst->dst[d].index + c] = NULL;
3969 }
3970 }
3971
3972 /* Clear where it's used as src. */
3973 for (int r = 0; r < this->next_temp; r++) {
3974 for (int c = 0; c < 4; c++) {
3975 if (!acp[4 * r + c])
3976 continue;
3977
3978 int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c);
3979
3980 if (acp[4 * r + c]->src[0].file == inst->dst[d].file &&
3981 acp[4 * r + c]->src[0].index == inst->dst[d].index &&
3982 inst->dst[d].writemask & (1 << src_chan)) {
3983 acp[4 * r + c] = NULL;
3984 }
3985 }
3986 }
3987 }
3988 }
3989 break;
3990 }
3991
3992 /* If this is a copy, add it to the ACP. */
3993 if (inst->op == TGSI_OPCODE_MOV &&
3994 inst->dst[0].file == PROGRAM_TEMPORARY &&
3995 !(inst->dst[0].file == inst->src[0].file &&
3996 inst->dst[0].index == inst->src[0].index) &&
3997 !inst->dst[0].reladdr &&
3998 !inst->dst[0].reladdr2 &&
3999 !inst->saturate &&
4000 inst->src[0].file != PROGRAM_ARRAY &&
4001 !inst->src[0].reladdr &&
4002 !inst->src[0].reladdr2 &&
4003 !inst->src[0].negate) {
4004 for (int i = 0; i < 4; i++) {
4005 if (inst->dst[0].writemask & (1 << i)) {
4006 acp[4 * inst->dst[0].index + i] = inst;
4007 acp_level[4 * inst->dst[0].index + i] = level;
4008 }
4009 }
4010 }
4011 }
4012
4013 ralloc_free(acp_level);
4014 ralloc_free(acp);
4015 }
4016
4017 /*
4018 * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead
4019 * code elimination.
4020 *
4021 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
4022 * will occur. As an example, a TXP production after copy propagation but
4023 * before this pass:
4024 *
4025 * 0: MOV TEMP[1], INPUT[4].xyyy;
4026 * 1: MOV TEMP[1].w, INPUT[4].wwww;
4027 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
4028 *
4029 * and after this pass:
4030 *
4031 * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
4032 */
4033 int
4034 glsl_to_tgsi_visitor::eliminate_dead_code(void)
4035 {
4036 glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx,
4037 glsl_to_tgsi_instruction *,
4038 this->next_temp * 4);
4039 int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
4040 int level = 0;
4041 int removed = 0;
4042
4043 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
4044 assert(inst->dst[0].file != PROGRAM_TEMPORARY
4045 || inst->dst[0].index < this->next_temp);
4046
4047 switch (inst->op) {
4048 case TGSI_OPCODE_BGNLOOP:
4049 case TGSI_OPCODE_ENDLOOP:
4050 case TGSI_OPCODE_CONT:
4051 case TGSI_OPCODE_BRK:
4052 /* End of a basic block, clear the write array entirely.
4053 *
4054 * This keeps us from killing dead code when the writes are
4055 * on either side of a loop, even when the register isn't touched
4056 * inside the loop. However, glsl_to_tgsi_visitor doesn't seem to emit
4057 * dead code of this type, so it shouldn't make a difference as long as
4058 * the dead code elimination pass in the GLSL compiler does its job.
4059 */
4060 memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
4061 break;
4062
4063 case TGSI_OPCODE_ENDIF:
4064 case TGSI_OPCODE_ELSE:
4065 /* Promote the recorded level of all channels written inside the
4066 * preceding if or else block to the level above the if/else block.
4067 */
4068 for (int r = 0; r < this->next_temp; r++) {
4069 for (int c = 0; c < 4; c++) {
4070 if (!writes[4 * r + c])
4071 continue;
4072
4073 if (write_level[4 * r + c] == level)
4074 write_level[4 * r + c] = level-1;
4075 }
4076 }
4077 if(inst->op == TGSI_OPCODE_ENDIF)
4078 --level;
4079 break;
4080
4081 case TGSI_OPCODE_IF:
4082 case TGSI_OPCODE_UIF:
4083 ++level;
4084 /* fallthrough to default case to mark the condition as read */
4085 default:
4086 /* Continuing the block, clear any channels from the write array that
4087 * are read by this instruction.
4088 */
4089 for (unsigned i = 0; i < ARRAY_SIZE(inst->src); i++) {
4090 if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){
4091 /* Any temporary might be read, so no dead code elimination
4092 * across this instruction.
4093 */
4094 memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
4095 } else if (inst->src[i].file == PROGRAM_TEMPORARY) {
4096 /* Clear where it's used as src. */
4097 int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0);
4098 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1);
4099 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2);
4100 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3);
4101
4102 for (int c = 0; c < 4; c++) {
4103 if (src_chans & (1 << c))
4104 writes[4 * inst->src[i].index + c] = NULL;
4105 }
4106 }
4107 }
4108 for (unsigned i = 0; i < inst->tex_offset_num_offset; i++) {
4109 if (inst->tex_offsets[i].file == PROGRAM_TEMPORARY && inst->tex_offsets[i].reladdr){
4110 /* Any temporary might be read, so no dead code elimination
4111 * across this instruction.
4112 */
4113 memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
4114 } else if (inst->tex_offsets[i].file == PROGRAM_TEMPORARY) {
4115 /* Clear where it's used as src. */
4116 int src_chans = 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 0);
4117 src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 1);
4118 src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 2);
4119 src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 3);
4120
4121 for (int c = 0; c < 4; c++) {
4122 if (src_chans & (1 << c))
4123 writes[4 * inst->tex_offsets[i].index + c] = NULL;
4124 }
4125 }
4126 }
4127 break;
4128 }
4129
4130 /* If this instruction writes to a temporary, add it to the write array.
4131 * If there is already an instruction in the write array for one or more
4132 * of the channels, flag that channel write as dead.
4133 */
4134 for (unsigned i = 0; i < ARRAY_SIZE(inst->dst); i++) {
4135 if (inst->dst[i].file == PROGRAM_TEMPORARY &&
4136 !inst->dst[i].reladdr &&
4137 !inst->saturate) {
4138 for (int c = 0; c < 4; c++) {
4139 if (inst->dst[i].writemask & (1 << c)) {
4140 if (writes[4 * inst->dst[i].index + c]) {
4141 if (write_level[4 * inst->dst[i].index + c] < level)
4142 continue;
4143 else
4144 writes[4 * inst->dst[i].index + c]->dead_mask |= (1 << c);
4145 }
4146 writes[4 * inst->dst[i].index + c] = inst;
4147 write_level[4 * inst->dst[i].index + c] = level;
4148 }
4149 }
4150 }
4151 }
4152 }
4153
4154 /* Anything still in the write array at this point is dead code. */
4155 for (int r = 0; r < this->next_temp; r++) {
4156 for (int c = 0; c < 4; c++) {
4157 glsl_to_tgsi_instruction *inst = writes[4 * r + c];
4158 if (inst)
4159 inst->dead_mask |= (1 << c);
4160 }
4161 }
4162
4163 /* Now actually remove the instructions that are completely dead and update
4164 * the writemask of other instructions with dead channels.
4165 */
4166 foreach_in_list_safe(glsl_to_tgsi_instruction, inst, &this->instructions) {
4167 if (!inst->dead_mask || !inst->dst[0].writemask)
4168 continue;
4169 else if ((inst->dst[0].writemask & ~inst->dead_mask) == 0) {
4170 inst->remove();
4171 delete inst;
4172 removed++;
4173 } else {
4174 if (inst->dst[0].type == GLSL_TYPE_DOUBLE) {
4175 if (inst->dead_mask == WRITEMASK_XY ||
4176 inst->dead_mask == WRITEMASK_ZW)
4177 inst->dst[0].writemask &= ~(inst->dead_mask);
4178 } else
4179 inst->dst[0].writemask &= ~(inst->dead_mask);
4180 }
4181 }
4182
4183 ralloc_free(write_level);
4184 ralloc_free(writes);
4185
4186 return removed;
4187 }
4188
4189 /* merge DFRACEXP instructions into one. */
4190 void
4191 glsl_to_tgsi_visitor::merge_two_dsts(void)
4192 {
4193 foreach_in_list_safe(glsl_to_tgsi_instruction, inst, &this->instructions) {
4194 glsl_to_tgsi_instruction *inst2;
4195 bool merged;
4196 if (num_inst_dst_regs(inst) != 2)
4197 continue;
4198
4199 if (inst->dst[0].file != PROGRAM_UNDEFINED &&
4200 inst->dst[1].file != PROGRAM_UNDEFINED)
4201 continue;
4202
4203 inst2 = (glsl_to_tgsi_instruction *) inst->next;
4204 do {
4205
4206 if (inst->src[0].file == inst2->src[0].file &&
4207 inst->src[0].index == inst2->src[0].index &&
4208 inst->src[0].type == inst2->src[0].type &&
4209 inst->src[0].swizzle == inst2->src[0].swizzle)
4210 break;
4211 inst2 = (glsl_to_tgsi_instruction *) inst2->next;
4212 } while (inst2);
4213
4214 if (!inst2)
4215 continue;
4216 merged = false;
4217 if (inst->dst[0].file == PROGRAM_UNDEFINED) {
4218 merged = true;
4219 inst->dst[0] = inst2->dst[0];
4220 } else if (inst->dst[1].file == PROGRAM_UNDEFINED) {
4221 inst->dst[1] = inst2->dst[1];
4222 merged = true;
4223 }
4224
4225 if (merged) {
4226 inst2->remove();
4227 delete inst2;
4228 }
4229 }
4230 }
4231
4232 /* Merges temporary registers together where possible to reduce the number of
4233 * registers needed to run a program.
4234 *
4235 * Produces optimal code only after copy propagation and dead code elimination
4236 * have been run. */
4237 void
4238 glsl_to_tgsi_visitor::merge_registers(void)
4239 {
4240 int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp);
4241 int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp);
4242 int i, j;
4243
4244 /* Read the indices of the last read and first write to each temp register
4245 * into an array so that we don't have to traverse the instruction list as
4246 * much. */
4247 for (i = 0; i < this->next_temp; i++) {
4248 last_reads[i] = -1;
4249 first_writes[i] = -1;
4250 }
4251 get_last_temp_read_first_temp_write(last_reads, first_writes);
4252
4253 /* Start looking for registers with non-overlapping usages that can be
4254 * merged together. */
4255 for (i = 0; i < this->next_temp; i++) {
4256 /* Don't touch unused registers. */
4257 if (last_reads[i] < 0 || first_writes[i] < 0) continue;
4258
4259 for (j = 0; j < this->next_temp; j++) {
4260 /* Don't touch unused registers. */
4261 if (last_reads[j] < 0 || first_writes[j] < 0) continue;
4262
4263 /* We can merge the two registers if the first write to j is after or
4264 * in the same instruction as the last read from i. Note that the
4265 * register at index i will always be used earlier or at the same time
4266 * as the register at index j. */
4267 if (first_writes[i] <= first_writes[j] &&
4268 last_reads[i] <= first_writes[j]) {
4269 rename_temp_register(j, i); /* Replace all references to j with i.*/
4270
4271 /* Update the first_writes and last_reads arrays with the new
4272 * values for the merged register index, and mark the newly unused
4273 * register index as such. */
4274 last_reads[i] = last_reads[j];
4275 first_writes[j] = -1;
4276 last_reads[j] = -1;
4277 }
4278 }
4279 }
4280
4281 ralloc_free(last_reads);
4282 ralloc_free(first_writes);
4283 }
4284
4285 /* Reassign indices to temporary registers by reusing unused indices created
4286 * by optimization passes. */
4287 void
4288 glsl_to_tgsi_visitor::renumber_registers(void)
4289 {
4290 int i = 0;
4291 int new_index = 0;
4292 int *first_reads = rzalloc_array(mem_ctx, int, this->next_temp);
4293
4294 for (i = 0; i < this->next_temp; i++)
4295 first_reads[i] = -1;
4296 get_first_temp_read(first_reads);
4297
4298 for (i = 0; i < this->next_temp; i++) {
4299 if (first_reads[i] < 0) continue;
4300 if (i != new_index)
4301 rename_temp_register(i, new_index);
4302 new_index++;
4303 }
4304
4305 this->next_temp = new_index;
4306 ralloc_free(first_reads);
4307 }
4308
4309 /**
4310 * Returns a fragment program which implements the current pixel transfer ops.
4311 * Based on get_pixel_transfer_program in st_atom_pixeltransfer.c.
4312 */
4313 extern "C" void
4314 get_pixel_transfer_visitor(struct st_fragment_program *fp,
4315 glsl_to_tgsi_visitor *original,
4316 int scale_and_bias, int pixel_maps)
4317 {
4318 glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
4319 struct st_context *st = st_context(original->ctx);
4320 struct gl_program *prog = &fp->Base.Base;
4321 struct gl_program_parameter_list *params = _mesa_new_parameter_list();
4322 st_src_reg coord, src0;
4323 st_dst_reg dst0;
4324 glsl_to_tgsi_instruction *inst;
4325
4326 /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
4327 v->ctx = original->ctx;
4328 v->prog = prog;
4329 v->shader_program = NULL;
4330 v->shader = NULL;
4331 v->glsl_version = original->glsl_version;
4332 v->native_integers = original->native_integers;
4333 v->options = original->options;
4334 v->next_temp = original->next_temp;
4335 v->num_address_regs = original->num_address_regs;
4336 v->samplers_used = prog->SamplersUsed = original->samplers_used;
4337 v->indirect_addr_consts = original->indirect_addr_consts;
4338 memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
4339 v->num_immediates = original->num_immediates;
4340
4341 /*
4342 * Get initial pixel color from the texture.
4343 * TEX colorTemp, fragment.texcoord[0], texture[0], 2D;
4344 */
4345 coord = st_src_reg(PROGRAM_INPUT, VARYING_SLOT_TEX0, glsl_type::vec2_type);
4346 src0 = v->get_temp(glsl_type::vec4_type);
4347 dst0 = st_dst_reg(src0);
4348 inst = v->emit_asm(NULL, TGSI_OPCODE_TEX, dst0, coord);
4349 inst->sampler_array_size = 1;
4350 inst->tex_target = TEXTURE_2D_INDEX;
4351
4352 prog->InputsRead |= VARYING_BIT_TEX0;
4353 prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */
4354 v->samplers_used |= (1 << 0);
4355
4356 if (scale_and_bias) {
4357 static const gl_state_index scale_state[STATE_LENGTH] =
4358 { STATE_INTERNAL, STATE_PT_SCALE,
4359 (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
4360 static const gl_state_index bias_state[STATE_LENGTH] =
4361 { STATE_INTERNAL, STATE_PT_BIAS,
4362 (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
4363 GLint scale_p, bias_p;
4364 st_src_reg scale, bias;
4365
4366 scale_p = _mesa_add_state_reference(params, scale_state);
4367 bias_p = _mesa_add_state_reference(params, bias_state);
4368
4369 /* MAD colorTemp, colorTemp, scale, bias; */
4370 scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT);
4371 bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT);
4372 inst = v->emit_asm(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias);
4373 }
4374
4375 if (pixel_maps) {
4376 st_src_reg temp = v->get_temp(glsl_type::vec4_type);
4377 st_dst_reg temp_dst = st_dst_reg(temp);
4378
4379 assert(st->pixel_xfer.pixelmap_texture);
4380 (void) st;
4381
4382 /* With a little effort, we can do four pixel map look-ups with
4383 * two TEX instructions:
4384 */
4385
4386 /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */
4387 temp_dst.writemask = WRITEMASK_XY; /* write R,G */
4388 inst = v->emit_asm(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
4389 inst->sampler.index = 1;
4390 inst->sampler_array_size = 1;
4391 inst->tex_target = TEXTURE_2D_INDEX;
4392
4393 /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */
4394 src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
4395 temp_dst.writemask = WRITEMASK_ZW; /* write B,A */
4396 inst = v->emit_asm(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
4397 inst->sampler.index = 1;
4398 inst->sampler_array_size = 1;
4399 inst->tex_target = TEXTURE_2D_INDEX;
4400
4401 prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */
4402 v->samplers_used |= (1 << 1);
4403
4404 /* MOV colorTemp, temp; */
4405 inst = v->emit_asm(NULL, TGSI_OPCODE_MOV, dst0, temp);
4406 }
4407
4408 /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
4409 * new visitor. */
4410 foreach_in_list(glsl_to_tgsi_instruction, inst, &original->instructions) {
4411 glsl_to_tgsi_instruction *newinst;
4412 st_src_reg src_regs[4];
4413
4414 if (inst->dst[0].file == PROGRAM_OUTPUT)
4415 prog->OutputsWritten |= BITFIELD64_BIT(inst->dst[0].index);
4416
4417 for (int i = 0; i < 4; i++) {
4418 src_regs[i] = inst->src[i];
4419 if (src_regs[i].file == PROGRAM_INPUT &&
4420 src_regs[i].index == VARYING_SLOT_COL0) {
4421 src_regs[i].file = PROGRAM_TEMPORARY;
4422 src_regs[i].index = src0.index;
4423 }
4424 else if (src_regs[i].file == PROGRAM_INPUT)
4425 prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index);
4426 }
4427
4428 newinst = v->emit_asm(NULL, inst->op, inst->dst[0], src_regs[0], src_regs[1], src_regs[2], src_regs[3]);
4429 newinst->tex_target = inst->tex_target;
4430 newinst->sampler_array_size = inst->sampler_array_size;
4431 }
4432
4433 /* Make modifications to fragment program info. */
4434 prog->Parameters = _mesa_combine_parameter_lists(params,
4435 original->prog->Parameters);
4436 _mesa_free_parameter_list(params);
4437 count_resources(v, prog);
4438 fp->glsl_to_tgsi = v;
4439 }
4440
4441 /**
4442 * Make fragment program for glBitmap:
4443 * Sample the texture and kill the fragment if the bit is 0.
4444 * This program will be combined with the user's fragment program.
4445 *
4446 * Based on make_bitmap_fragment_program in st_cb_bitmap.c.
4447 */
4448 extern "C" void
4449 get_bitmap_visitor(struct st_fragment_program *fp,
4450 glsl_to_tgsi_visitor *original, int samplerIndex)
4451 {
4452 glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
4453 struct st_context *st = st_context(original->ctx);
4454 struct gl_program *prog = &fp->Base.Base;
4455 st_src_reg coord, src0;
4456 st_dst_reg dst0;
4457 glsl_to_tgsi_instruction *inst;
4458
4459 /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
4460 v->ctx = original->ctx;
4461 v->prog = prog;
4462 v->shader_program = NULL;
4463 v->shader = NULL;
4464 v->glsl_version = original->glsl_version;
4465 v->native_integers = original->native_integers;
4466 v->options = original->options;
4467 v->next_temp = original->next_temp;
4468 v->num_address_regs = original->num_address_regs;
4469 v->samplers_used = prog->SamplersUsed = original->samplers_used;
4470 v->indirect_addr_consts = original->indirect_addr_consts;
4471 memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
4472 v->num_immediates = original->num_immediates;
4473
4474 /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
4475 coord = st_src_reg(PROGRAM_INPUT, VARYING_SLOT_TEX0, glsl_type::vec2_type);
4476 src0 = v->get_temp(glsl_type::vec4_type);
4477 dst0 = st_dst_reg(src0);
4478 inst = v->emit_asm(NULL, TGSI_OPCODE_TEX, dst0, coord);
4479 inst->sampler.index = samplerIndex;
4480 inst->sampler_array_size = 1;
4481 inst->tex_target = TEXTURE_2D_INDEX;
4482
4483 prog->InputsRead |= VARYING_BIT_TEX0;
4484 prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */
4485 v->samplers_used |= (1 << samplerIndex);
4486
4487 /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */
4488 src0.negate = NEGATE_XYZW;
4489 if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM)
4490 src0.swizzle = SWIZZLE_XXXX;
4491 inst = v->emit_asm(NULL, TGSI_OPCODE_KILL_IF, undef_dst, src0);
4492
4493 /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
4494 * new visitor. */
4495 foreach_in_list(glsl_to_tgsi_instruction, inst, &original->instructions) {
4496 glsl_to_tgsi_instruction *newinst;
4497 st_src_reg src_regs[4];
4498
4499 if (inst->dst[0].file == PROGRAM_OUTPUT)
4500 prog->OutputsWritten |= BITFIELD64_BIT(inst->dst[0].index);
4501
4502 for (int i = 0; i < 4; i++) {
4503 src_regs[i] = inst->src[i];
4504 if (src_regs[i].file == PROGRAM_INPUT)
4505 prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index);
4506 }
4507
4508 newinst = v->emit_asm(NULL, inst->op, inst->dst[0], src_regs[0], src_regs[1], src_regs[2], src_regs[3]);
4509 newinst->tex_target = inst->tex_target;
4510 newinst->sampler_array_size = inst->sampler_array_size;
4511 }
4512
4513 /* Make modifications to fragment program info. */
4514 prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters);
4515 count_resources(v, prog);
4516 fp->glsl_to_tgsi = v;
4517 }
4518
4519 /* ------------------------- TGSI conversion stuff -------------------------- */
4520 struct label {
4521 unsigned branch_target;
4522 unsigned token;
4523 };
4524
4525 /**
4526 * Intermediate state used during shader translation.
4527 */
4528 struct st_translate {
4529 struct ureg_program *ureg;
4530
4531 unsigned temps_size;
4532 struct ureg_dst *temps;
4533
4534 struct ureg_dst *arrays;
4535 unsigned num_temp_arrays;
4536 struct ureg_src *constants;
4537 int num_constants;
4538 struct ureg_src *immediates;
4539 int num_immediates;
4540 struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
4541 struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
4542 struct ureg_dst address[3];
4543 struct ureg_src samplers[PIPE_MAX_SAMPLERS];
4544 struct ureg_src systemValues[SYSTEM_VALUE_MAX];
4545 struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
4546 unsigned *array_sizes;
4547 struct array_decl *input_arrays;
4548 struct array_decl *output_arrays;
4549
4550 const GLuint *inputMapping;
4551 const GLuint *outputMapping;
4552
4553 /* For every instruction that contains a label (eg CALL), keep
4554 * details so that we can go back afterwards and emit the correct
4555 * tgsi instruction number for each label.
4556 */
4557 struct label *labels;
4558 unsigned labels_size;
4559 unsigned labels_count;
4560
4561 /* Keep a record of the tgsi instruction number that each mesa
4562 * instruction starts at, will be used to fix up labels after
4563 * translation.
4564 */
4565 unsigned *insn;
4566 unsigned insn_size;
4567 unsigned insn_count;
4568
4569 unsigned procType; /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */
4570
4571 boolean error;
4572 };
4573
4574 /** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */
4575 const unsigned _mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
4576 /* Vertex shader
4577 */
4578 TGSI_SEMANTIC_VERTEXID,
4579 TGSI_SEMANTIC_INSTANCEID,
4580 TGSI_SEMANTIC_VERTEXID_NOBASE,
4581 TGSI_SEMANTIC_BASEVERTEX,
4582
4583 /* Geometry shader
4584 */
4585 TGSI_SEMANTIC_INVOCATIONID,
4586
4587 /* Fragment shader
4588 */
4589 TGSI_SEMANTIC_FACE,
4590 TGSI_SEMANTIC_SAMPLEID,
4591 TGSI_SEMANTIC_SAMPLEPOS,
4592 TGSI_SEMANTIC_SAMPLEMASK,
4593
4594 /* Tessellation shaders
4595 */
4596 TGSI_SEMANTIC_TESSCOORD,
4597 TGSI_SEMANTIC_VERTICESIN,
4598 TGSI_SEMANTIC_PRIMID,
4599 TGSI_SEMANTIC_TESSOUTER,
4600 TGSI_SEMANTIC_TESSINNER,
4601 };
4602
4603 /**
4604 * Make note of a branch to a label in the TGSI code.
4605 * After we've emitted all instructions, we'll go over the list
4606 * of labels built here and patch the TGSI code with the actual
4607 * location of each label.
4608 */
4609 static unsigned *get_label(struct st_translate *t, unsigned branch_target)
4610 {
4611 unsigned i;
4612
4613 if (t->labels_count + 1 >= t->labels_size) {
4614 t->labels_size = 1 << (util_logbase2(t->labels_size) + 1);
4615 t->labels = (struct label *)realloc(t->labels,
4616 t->labels_size * sizeof(struct label));
4617 if (t->labels == NULL) {
4618 static unsigned dummy;
4619 t->error = TRUE;
4620 return &dummy;
4621 }
4622 }
4623
4624 i = t->labels_count++;
4625 t->labels[i].branch_target = branch_target;
4626 return &t->labels[i].token;
4627 }
4628
4629 /**
4630 * Called prior to emitting the TGSI code for each instruction.
4631 * Allocate additional space for instructions if needed.
4632 * Update the insn[] array so the next glsl_to_tgsi_instruction points to
4633 * the next TGSI instruction.
4634 */
4635 static void set_insn_start(struct st_translate *t, unsigned start)
4636 {
4637 if (t->insn_count + 1 >= t->insn_size) {
4638 t->insn_size = 1 << (util_logbase2(t->insn_size) + 1);
4639 t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof(t->insn[0]));
4640 if (t->insn == NULL) {
4641 t->error = TRUE;
4642 return;
4643 }
4644 }
4645
4646 t->insn[t->insn_count++] = start;
4647 }
4648
4649 /**
4650 * Map a glsl_to_tgsi constant/immediate to a TGSI immediate.
4651 */
4652 static struct ureg_src
4653 emit_immediate(struct st_translate *t,
4654 gl_constant_value values[4],
4655 int type, int size)
4656 {
4657 struct ureg_program *ureg = t->ureg;
4658
4659 switch(type)
4660 {
4661 case GL_FLOAT:
4662 return ureg_DECL_immediate(ureg, &values[0].f, size);
4663 case GL_DOUBLE:
4664 return ureg_DECL_immediate_f64(ureg, (double *)&values[0].f, size);
4665 case GL_INT:
4666 return ureg_DECL_immediate_int(ureg, &values[0].i, size);
4667 case GL_UNSIGNED_INT:
4668 case GL_BOOL:
4669 return ureg_DECL_immediate_uint(ureg, &values[0].u, size);
4670 default:
4671 assert(!"should not get here - type must be float, int, uint, or bool");
4672 return ureg_src_undef();
4673 }
4674 }
4675
4676 /**
4677 * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register.
4678 */
4679 static struct ureg_dst
4680 dst_register(struct st_translate *t, gl_register_file file, unsigned index,
4681 unsigned array_id)
4682 {
4683 unsigned array;
4684
4685 switch(file) {
4686 case PROGRAM_UNDEFINED:
4687 return ureg_dst_undef();
4688
4689 case PROGRAM_TEMPORARY:
4690 /* Allocate space for temporaries on demand. */
4691 if (index >= t->temps_size) {
4692 const int inc = 4096;
4693
4694 t->temps = (struct ureg_dst*)
4695 realloc(t->temps,
4696 (t->temps_size + inc) * sizeof(struct ureg_dst));
4697 if (!t->temps)
4698 return ureg_dst_undef();
4699
4700 memset(t->temps + t->temps_size, 0, inc * sizeof(struct ureg_dst));
4701 t->temps_size += inc;
4702 }
4703
4704 if (ureg_dst_is_undef(t->temps[index]))
4705 t->temps[index] = ureg_DECL_local_temporary(t->ureg);
4706
4707 return t->temps[index];
4708
4709 case PROGRAM_ARRAY:
4710 array = index >> 16;
4711
4712 assert(array < t->num_temp_arrays);
4713
4714 if (ureg_dst_is_undef(t->arrays[array]))
4715 t->arrays[array] = ureg_DECL_array_temporary(
4716 t->ureg, t->array_sizes[array], TRUE);
4717
4718 return ureg_dst_array_offset(t->arrays[array],
4719 (int)(index & 0xFFFF) - 0x8000);
4720
4721 case PROGRAM_OUTPUT:
4722 if (!array_id) {
4723 if (t->procType == TGSI_PROCESSOR_FRAGMENT)
4724 assert(index < FRAG_RESULT_MAX);
4725 else if (t->procType == TGSI_PROCESSOR_TESS_CTRL ||
4726 t->procType == TGSI_PROCESSOR_TESS_EVAL)
4727 assert(index < VARYING_SLOT_TESS_MAX);
4728 else
4729 assert(index < VARYING_SLOT_MAX);
4730
4731 assert(t->outputMapping[index] < ARRAY_SIZE(t->outputs));
4732 assert(t->outputs[t->outputMapping[index]].File != TGSI_FILE_NULL);
4733 return t->outputs[t->outputMapping[index]];
4734 }
4735 else {
4736 struct array_decl *decl = &t->output_arrays[array_id-1];
4737 unsigned mesa_index = decl->mesa_index;
4738 int slot = t->outputMapping[mesa_index];
4739
4740 assert(slot != -1 && t->outputs[slot].File == TGSI_FILE_OUTPUT);
4741 assert(t->outputs[slot].ArrayID == array_id);
4742 return ureg_dst_array_offset(t->outputs[slot], index - mesa_index);
4743 }
4744
4745 case PROGRAM_ADDRESS:
4746 return t->address[index];
4747
4748 default:
4749 assert(!"unknown dst register file");
4750 return ureg_dst_undef();
4751 }
4752 }
4753
4754 /**
4755 * Map a glsl_to_tgsi src register to a TGSI ureg_src register.
4756 */
4757 static struct ureg_src
4758 src_register(struct st_translate *t, const st_src_reg *reg)
4759 {
4760 int index = reg->index;
4761 int double_reg2 = reg->double_reg2 ? 1 : 0;
4762
4763 switch(reg->file) {
4764 case PROGRAM_UNDEFINED:
4765 return ureg_imm4f(t->ureg, 0, 0, 0, 0);
4766
4767 case PROGRAM_TEMPORARY:
4768 case PROGRAM_ARRAY:
4769 case PROGRAM_OUTPUT:
4770 return ureg_src(dst_register(t, reg->file, reg->index, reg->array_id));
4771
4772 case PROGRAM_UNIFORM:
4773 assert(reg->index >= 0);
4774 return reg->index < t->num_constants ?
4775 t->constants[reg->index] : ureg_imm4f(t->ureg, 0, 0, 0, 0);
4776 case PROGRAM_STATE_VAR:
4777 case PROGRAM_CONSTANT: /* ie, immediate */
4778 if (reg->has_index2)
4779 return ureg_src_register(TGSI_FILE_CONSTANT, reg->index);
4780 else
4781 return reg->index >= 0 && reg->index < t->num_constants ?
4782 t->constants[reg->index] : ureg_imm4f(t->ureg, 0, 0, 0, 0);
4783
4784 case PROGRAM_IMMEDIATE:
4785 assert(reg->index >= 0 && reg->index < t->num_immediates);
4786 return t->immediates[reg->index];
4787
4788 case PROGRAM_INPUT:
4789 /* GLSL inputs are 64-bit containers, so we have to
4790 * map back to the original index and add the offset after
4791 * mapping. */
4792 index -= double_reg2;
4793 if (!reg->array_id) {
4794 assert(t->inputMapping[index] < ARRAY_SIZE(t->inputs));
4795 assert(t->inputs[t->inputMapping[index]].File != TGSI_FILE_NULL);
4796 return t->inputs[t->inputMapping[index]];
4797 }
4798 else {
4799 struct array_decl *decl = &t->input_arrays[reg->array_id-1];
4800 unsigned mesa_index = decl->mesa_index;
4801 int slot = t->inputMapping[mesa_index];
4802
4803 assert(slot != -1 && t->inputs[slot].File == TGSI_FILE_INPUT);
4804 assert(t->inputs[slot].ArrayID == reg->array_id);
4805 return ureg_src_array_offset(t->inputs[slot], index - mesa_index);
4806 }
4807
4808 case PROGRAM_ADDRESS:
4809 return ureg_src(t->address[reg->index]);
4810
4811 case PROGRAM_SYSTEM_VALUE:
4812 assert(reg->index < (int) ARRAY_SIZE(t->systemValues));
4813 return t->systemValues[reg->index];
4814
4815 default:
4816 assert(!"unknown src register file");
4817 return ureg_src_undef();
4818 }
4819 }
4820
4821 /**
4822 * Create a TGSI ureg_dst register from an st_dst_reg.
4823 */
4824 static struct ureg_dst
4825 translate_dst(struct st_translate *t,
4826 const st_dst_reg *dst_reg,
4827 bool saturate, bool clamp_color)
4828 {
4829 struct ureg_dst dst = dst_register(t, dst_reg->file, dst_reg->index,
4830 dst_reg->array_id);
4831
4832 if (dst.File == TGSI_FILE_NULL)
4833 return dst;
4834
4835 dst = ureg_writemask(dst, dst_reg->writemask);
4836
4837 if (saturate)
4838 dst = ureg_saturate(dst);
4839 else if (clamp_color && dst_reg->file == PROGRAM_OUTPUT) {
4840 /* Clamp colors for ARB_color_buffer_float. */
4841 switch (t->procType) {
4842 case TGSI_PROCESSOR_VERTEX:
4843 /* This can only occur with a compatibility profile, which doesn't
4844 * support geometry shaders. */
4845 if (dst_reg->index == VARYING_SLOT_COL0 ||
4846 dst_reg->index == VARYING_SLOT_COL1 ||
4847 dst_reg->index == VARYING_SLOT_BFC0 ||
4848 dst_reg->index == VARYING_SLOT_BFC1) {
4849 dst = ureg_saturate(dst);
4850 }
4851 break;
4852
4853 case TGSI_PROCESSOR_FRAGMENT:
4854 if (dst_reg->index == FRAG_RESULT_COLOR ||
4855 dst_reg->index >= FRAG_RESULT_DATA0) {
4856 dst = ureg_saturate(dst);
4857 }
4858 break;
4859 }
4860 }
4861
4862 if (dst_reg->reladdr != NULL) {
4863 assert(dst_reg->file != PROGRAM_TEMPORARY);
4864 dst = ureg_dst_indirect(dst, ureg_src(t->address[0]));
4865 }
4866
4867 if (dst_reg->has_index2) {
4868 if (dst_reg->reladdr2)
4869 dst = ureg_dst_dimension_indirect(dst, ureg_src(t->address[1]),
4870 dst_reg->index2D);
4871 else
4872 dst = ureg_dst_dimension(dst, dst_reg->index2D);
4873 }
4874
4875 return dst;
4876 }
4877
4878 /**
4879 * Create a TGSI ureg_src register from an st_src_reg.
4880 */
4881 static struct ureg_src
4882 translate_src(struct st_translate *t, const st_src_reg *src_reg)
4883 {
4884 struct ureg_src src = src_register(t, src_reg);
4885
4886 if (src_reg->has_index2) {
4887 /* 2D indexes occur with geometry shader inputs (attrib, vertex)
4888 * and UBO constant buffers (buffer, position).
4889 */
4890 if (src_reg->reladdr2)
4891 src = ureg_src_dimension_indirect(src, ureg_src(t->address[1]),
4892 src_reg->index2D);
4893 else
4894 src = ureg_src_dimension(src, src_reg->index2D);
4895 }
4896
4897 src = ureg_swizzle(src,
4898 GET_SWZ(src_reg->swizzle, 0) & 0x3,
4899 GET_SWZ(src_reg->swizzle, 1) & 0x3,
4900 GET_SWZ(src_reg->swizzle, 2) & 0x3,
4901 GET_SWZ(src_reg->swizzle, 3) & 0x3);
4902
4903 if ((src_reg->negate & 0xf) == NEGATE_XYZW)
4904 src = ureg_negate(src);
4905
4906 if (src_reg->reladdr != NULL) {
4907 assert(src_reg->file != PROGRAM_TEMPORARY);
4908 src = ureg_src_indirect(src, ureg_src(t->address[0]));
4909 }
4910
4911 return src;
4912 }
4913
4914 static struct tgsi_texture_offset
4915 translate_tex_offset(struct st_translate *t,
4916 const st_src_reg *in_offset, int idx)
4917 {
4918 struct tgsi_texture_offset offset;
4919 struct ureg_src imm_src;
4920 struct ureg_dst dst;
4921 int array;
4922
4923 switch (in_offset->file) {
4924 case PROGRAM_IMMEDIATE:
4925 assert(in_offset->index >= 0 && in_offset->index < t->num_immediates);
4926 imm_src = t->immediates[in_offset->index];
4927
4928 offset.File = imm_src.File;
4929 offset.Index = imm_src.Index;
4930 offset.SwizzleX = imm_src.SwizzleX;
4931 offset.SwizzleY = imm_src.SwizzleY;
4932 offset.SwizzleZ = imm_src.SwizzleZ;
4933 offset.Padding = 0;
4934 break;
4935 case PROGRAM_TEMPORARY:
4936 imm_src = ureg_src(t->temps[in_offset->index]);
4937 offset.File = imm_src.File;
4938 offset.Index = imm_src.Index;
4939 offset.SwizzleX = GET_SWZ(in_offset->swizzle, 0);
4940 offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1);
4941 offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2);
4942 offset.Padding = 0;
4943 break;
4944 case PROGRAM_ARRAY:
4945 array = in_offset->index >> 16;
4946
4947 assert(array >= 0);
4948 assert(array < (int)t->num_temp_arrays);
4949
4950 dst = t->arrays[array];
4951 offset.File = dst.File;
4952 offset.Index = dst.Index + (in_offset->index & 0xFFFF) - 0x8000;
4953 offset.SwizzleX = GET_SWZ(in_offset->swizzle, 0);
4954 offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1);
4955 offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2);
4956 offset.Padding = 0;
4957 break;
4958 default:
4959 break;
4960 }
4961 return offset;
4962 }
4963
4964 static void
4965 compile_tgsi_instruction(struct st_translate *t,
4966 const glsl_to_tgsi_instruction *inst,
4967 bool clamp_dst_color_output)
4968 {
4969 struct ureg_program *ureg = t->ureg;
4970 GLuint i;
4971 struct ureg_dst dst[2];
4972 struct ureg_src src[4];
4973 struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET];
4974
4975 unsigned num_dst;
4976 unsigned num_src;
4977 unsigned tex_target;
4978
4979 num_dst = num_inst_dst_regs(inst);
4980 num_src = num_inst_src_regs(inst);
4981
4982 for (i = 0; i < num_dst; i++)
4983 dst[i] = translate_dst(t,
4984 &inst->dst[i],
4985 inst->saturate,
4986 clamp_dst_color_output);
4987
4988 for (i = 0; i < num_src; i++)
4989 src[i] = translate_src(t, &inst->src[i]);
4990
4991 switch(inst->op) {
4992 case TGSI_OPCODE_BGNLOOP:
4993 case TGSI_OPCODE_CAL:
4994 case TGSI_OPCODE_ELSE:
4995 case TGSI_OPCODE_ENDLOOP:
4996 case TGSI_OPCODE_IF:
4997 case TGSI_OPCODE_UIF:
4998 assert(num_dst == 0);
4999 ureg_label_insn(ureg,
5000 inst->op,
5001 src, num_src,
5002 get_label(t,
5003 inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0));
5004 return;
5005
5006 case TGSI_OPCODE_TEX:
5007 case TGSI_OPCODE_TXB:
5008 case TGSI_OPCODE_TXD:
5009 case TGSI_OPCODE_TXL:
5010 case TGSI_OPCODE_TXP:
5011 case TGSI_OPCODE_TXQ:
5012 case TGSI_OPCODE_TXF:
5013 case TGSI_OPCODE_TEX2:
5014 case TGSI_OPCODE_TXB2:
5015 case TGSI_OPCODE_TXL2:
5016 case TGSI_OPCODE_TG4:
5017 case TGSI_OPCODE_LODQ:
5018 src[num_src] = t->samplers[inst->sampler.index];
5019 assert(src[num_src].File != TGSI_FILE_NULL);
5020 if (inst->sampler.reladdr)
5021 src[num_src] =
5022 ureg_src_indirect(src[num_src], ureg_src(t->address[2]));
5023 num_src++;
5024 for (i = 0; i < inst->tex_offset_num_offset; i++) {
5025 texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i], i);
5026 }
5027 tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow);
5028
5029 ureg_tex_insn(ureg,
5030 inst->op,
5031 dst, num_dst,
5032 tex_target,
5033 texoffsets, inst->tex_offset_num_offset,
5034 src, num_src);
5035 return;
5036
5037 case TGSI_OPCODE_SCS:
5038 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY);
5039 ureg_insn(ureg, inst->op, dst, num_dst, src, num_src);
5040 break;
5041
5042 default:
5043 ureg_insn(ureg,
5044 inst->op,
5045 dst, num_dst,
5046 src, num_src);
5047 break;
5048 }
5049 }
5050
5051 /**
5052 * Emit the TGSI instructions for inverting and adjusting WPOS.
5053 * This code is unavoidable because it also depends on whether
5054 * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM).
5055 */
5056 static void
5057 emit_wpos_adjustment( struct st_translate *t,
5058 int wpos_transform_const,
5059 boolean invert,
5060 GLfloat adjX, GLfloat adjY[2])
5061 {
5062 struct ureg_program *ureg = t->ureg;
5063
5064 assert(wpos_transform_const >= 0);
5065
5066 /* Fragment program uses fragment position input.
5067 * Need to replace instances of INPUT[WPOS] with temp T
5068 * where T = INPUT[WPOS] is inverted by Y.
5069 */
5070 struct ureg_src wpostrans = ureg_DECL_constant(ureg, wpos_transform_const);
5071 struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg );
5072 struct ureg_src wpos_input = t->inputs[t->inputMapping[VARYING_SLOT_POS]];
5073
5074 /* First, apply the coordinate shift: */
5075 if (adjX || adjY[0] || adjY[1]) {
5076 if (adjY[0] != adjY[1]) {
5077 /* Adjust the y coordinate by adjY[1] or adjY[0] respectively
5078 * depending on whether inversion is actually going to be applied
5079 * or not, which is determined by testing against the inversion
5080 * state variable used below, which will be either +1 or -1.
5081 */
5082 struct ureg_dst adj_temp = ureg_DECL_local_temporary(ureg);
5083
5084 ureg_CMP(ureg, adj_temp,
5085 ureg_scalar(wpostrans, invert ? 2 : 0),
5086 ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f),
5087 ureg_imm4f(ureg, adjX, adjY[1], 0.0f, 0.0f));
5088 ureg_ADD(ureg, wpos_temp, wpos_input, ureg_src(adj_temp));
5089 } else {
5090 ureg_ADD(ureg, wpos_temp, wpos_input,
5091 ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f));
5092 }
5093 wpos_input = ureg_src(wpos_temp);
5094 } else {
5095 /* MOV wpos_temp, input[wpos]
5096 */
5097 ureg_MOV( ureg, wpos_temp, wpos_input );
5098 }
5099
5100 /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be
5101 * inversion/identity, or the other way around if we're drawing to an FBO.
5102 */
5103 if (invert) {
5104 /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
5105 */
5106 ureg_MAD( ureg,
5107 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
5108 wpos_input,
5109 ureg_scalar(wpostrans, 0),
5110 ureg_scalar(wpostrans, 1));
5111 } else {
5112 /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
5113 */
5114 ureg_MAD( ureg,
5115 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
5116 wpos_input,
5117 ureg_scalar(wpostrans, 2),
5118 ureg_scalar(wpostrans, 3));
5119 }
5120
5121 /* Use wpos_temp as position input from here on:
5122 */
5123 t->inputs[t->inputMapping[VARYING_SLOT_POS]] = ureg_src(wpos_temp);
5124 }
5125
5126
5127 /**
5128 * Emit fragment position/ooordinate code.
5129 */
5130 static void
5131 emit_wpos(struct st_context *st,
5132 struct st_translate *t,
5133 const struct gl_program *program,
5134 struct ureg_program *ureg,
5135 int wpos_transform_const)
5136 {
5137 const struct gl_fragment_program *fp =
5138 (const struct gl_fragment_program *) program;
5139 struct pipe_screen *pscreen = st->pipe->screen;
5140 GLfloat adjX = 0.0f;
5141 GLfloat adjY[2] = { 0.0f, 0.0f };
5142 boolean invert = FALSE;
5143
5144 /* Query the pixel center conventions supported by the pipe driver and set
5145 * adjX, adjY to help out if it cannot handle the requested one internally.
5146 *
5147 * The bias of the y-coordinate depends on whether y-inversion takes place
5148 * (adjY[1]) or not (adjY[0]), which is in turn dependent on whether we are
5149 * drawing to an FBO (causes additional inversion), and whether the the pipe
5150 * driver origin and the requested origin differ (the latter condition is
5151 * stored in the 'invert' variable).
5152 *
5153 * For height = 100 (i = integer, h = half-integer, l = lower, u = upper):
5154 *
5155 * center shift only:
5156 * i -> h: +0.5
5157 * h -> i: -0.5
5158 *
5159 * inversion only:
5160 * l,i -> u,i: ( 0.0 + 1.0) * -1 + 100 = 99
5161 * l,h -> u,h: ( 0.5 + 0.0) * -1 + 100 = 99.5
5162 * u,i -> l,i: (99.0 + 1.0) * -1 + 100 = 0
5163 * u,h -> l,h: (99.5 + 0.0) * -1 + 100 = 0.5
5164 *
5165 * inversion and center shift:
5166 * l,i -> u,h: ( 0.0 + 0.5) * -1 + 100 = 99.5
5167 * l,h -> u,i: ( 0.5 + 0.5) * -1 + 100 = 99
5168 * u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5
5169 * u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0
5170 */
5171 if (fp->OriginUpperLeft) {
5172 /* Fragment shader wants origin in upper-left */
5173 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
5174 /* the driver supports upper-left origin */
5175 }
5176 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) {
5177 /* the driver supports lower-left origin, need to invert Y */
5178 ureg_property(ureg, TGSI_PROPERTY_FS_COORD_ORIGIN,
5179 TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
5180 invert = TRUE;
5181 }
5182 else
5183 assert(0);
5184 }
5185 else {
5186 /* Fragment shader wants origin in lower-left */
5187 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT))
5188 /* the driver supports lower-left origin */
5189 ureg_property(ureg, TGSI_PROPERTY_FS_COORD_ORIGIN,
5190 TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
5191 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT))
5192 /* the driver supports upper-left origin, need to invert Y */
5193 invert = TRUE;
5194 else
5195 assert(0);
5196 }
5197
5198 if (fp->PixelCenterInteger) {
5199 /* Fragment shader wants pixel center integer */
5200 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
5201 /* the driver supports pixel center integer */
5202 adjY[1] = 1.0f;
5203 ureg_property(ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER,
5204 TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
5205 }
5206 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
5207 /* the driver supports pixel center half integer, need to bias X,Y */
5208 adjX = -0.5f;
5209 adjY[0] = -0.5f;
5210 adjY[1] = 0.5f;
5211 }
5212 else
5213 assert(0);
5214 }
5215 else {
5216 /* Fragment shader wants pixel center half integer */
5217 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
5218 /* the driver supports pixel center half integer */
5219 }
5220 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
5221 /* the driver supports pixel center integer, need to bias X,Y */
5222 adjX = adjY[0] = adjY[1] = 0.5f;
5223 ureg_property(ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER,
5224 TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
5225 }
5226 else
5227 assert(0);
5228 }
5229
5230 /* we invert after adjustment so that we avoid the MOV to temporary,
5231 * and reuse the adjustment ADD instead */
5232 emit_wpos_adjustment(t, wpos_transform_const, invert, adjX, adjY);
5233 }
5234
5235 /**
5236 * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back.
5237 * TGSI uses +1 for front, -1 for back.
5238 * This function converts the TGSI value to the GL value. Simply clamping/
5239 * saturating the value to [0,1] does the job.
5240 */
5241 static void
5242 emit_face_var(struct gl_context *ctx, struct st_translate *t)
5243 {
5244 struct ureg_program *ureg = t->ureg;
5245 struct ureg_dst face_temp = ureg_DECL_temporary(ureg);
5246 struct ureg_src face_input = t->inputs[t->inputMapping[VARYING_SLOT_FACE]];
5247
5248 if (ctx->Const.NativeIntegers) {
5249 ureg_FSGE(ureg, face_temp, face_input, ureg_imm1f(ureg, 0));
5250 }
5251 else {
5252 /* MOV_SAT face_temp, input[face] */
5253 ureg_MOV(ureg, ureg_saturate(face_temp), face_input);
5254 }
5255
5256 /* Use face_temp as face input from here on: */
5257 t->inputs[t->inputMapping[VARYING_SLOT_FACE]] = ureg_src(face_temp);
5258 }
5259
5260 static void
5261 emit_edgeflags(struct st_translate *t)
5262 {
5263 struct ureg_program *ureg = t->ureg;
5264 struct ureg_dst edge_dst = t->outputs[t->outputMapping[VARYING_SLOT_EDGE]];
5265 struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]];
5266
5267 ureg_MOV(ureg, edge_dst, edge_src);
5268 }
5269
5270 static bool
5271 find_array(unsigned attr, struct array_decl *arrays, unsigned count,
5272 unsigned *array_id, unsigned *array_size)
5273 {
5274 unsigned i;
5275
5276 for (i = 0; i < count; i++) {
5277 struct array_decl *decl = &arrays[i];
5278
5279 if (attr == decl->mesa_index) {
5280 *array_id = decl->array_id;
5281 *array_size = decl->array_size;
5282 assert(*array_size);
5283 return true;
5284 }
5285 }
5286 return false;
5287 }
5288
5289 /**
5290 * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format.
5291 * \param program the program to translate
5292 * \param numInputs number of input registers used
5293 * \param inputMapping maps Mesa fragment program inputs to TGSI generic
5294 * input indexes
5295 * \param inputSemanticName the TGSI_SEMANTIC flag for each input
5296 * \param inputSemanticIndex the semantic index (ex: which texcoord) for
5297 * each input
5298 * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
5299 * \param interpLocation the TGSI_INTERPOLATE_LOC_* location for each input
5300 * \param numOutputs number of output registers used
5301 * \param outputMapping maps Mesa fragment program outputs to TGSI
5302 * generic outputs
5303 * \param outputSemanticName the TGSI_SEMANTIC flag for each output
5304 * \param outputSemanticIndex the semantic index (ex: which texcoord) for
5305 * each output
5306 *
5307 * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY
5308 */
5309 extern "C" enum pipe_error
5310 st_translate_program(
5311 struct gl_context *ctx,
5312 uint procType,
5313 struct ureg_program *ureg,
5314 glsl_to_tgsi_visitor *program,
5315 const struct gl_program *proginfo,
5316 GLuint numInputs,
5317 const GLuint inputMapping[],
5318 const GLuint inputSlotToAttr[],
5319 const ubyte inputSemanticName[],
5320 const ubyte inputSemanticIndex[],
5321 const GLuint interpMode[],
5322 const GLuint interpLocation[],
5323 GLuint numOutputs,
5324 const GLuint outputMapping[],
5325 const GLuint outputSlotToAttr[],
5326 const ubyte outputSemanticName[],
5327 const ubyte outputSemanticIndex[],
5328 boolean passthrough_edgeflags,
5329 boolean clamp_color)
5330 {
5331 struct st_translate *t;
5332 unsigned i;
5333 enum pipe_error ret = PIPE_OK;
5334
5335 assert(numInputs <= ARRAY_SIZE(t->inputs));
5336 assert(numOutputs <= ARRAY_SIZE(t->outputs));
5337
5338 assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_FRONT_FACE] ==
5339 TGSI_SEMANTIC_FACE);
5340 assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_VERTEX_ID] ==
5341 TGSI_SEMANTIC_VERTEXID);
5342 assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_INSTANCE_ID] ==
5343 TGSI_SEMANTIC_INSTANCEID);
5344 assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_SAMPLE_ID] ==
5345 TGSI_SEMANTIC_SAMPLEID);
5346 assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_SAMPLE_POS] ==
5347 TGSI_SEMANTIC_SAMPLEPOS);
5348 assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_SAMPLE_MASK_IN] ==
5349 TGSI_SEMANTIC_SAMPLEMASK);
5350 assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_INVOCATION_ID] ==
5351 TGSI_SEMANTIC_INVOCATIONID);
5352 assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE] ==
5353 TGSI_SEMANTIC_VERTEXID_NOBASE);
5354 assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_BASE_VERTEX] ==
5355 TGSI_SEMANTIC_BASEVERTEX);
5356 assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_TESS_COORD] ==
5357 TGSI_SEMANTIC_TESSCOORD);
5358
5359 t = CALLOC_STRUCT(st_translate);
5360 if (!t) {
5361 ret = PIPE_ERROR_OUT_OF_MEMORY;
5362 goto out;
5363 }
5364
5365 t->procType = procType;
5366 t->inputMapping = inputMapping;
5367 t->outputMapping = outputMapping;
5368 t->ureg = ureg;
5369 t->num_temp_arrays = program->next_array;
5370 if (t->num_temp_arrays)
5371 t->arrays = (struct ureg_dst*)
5372 calloc(1, sizeof(t->arrays[0]) * t->num_temp_arrays);
5373
5374 /*
5375 * Declare input attributes.
5376 */
5377 switch (procType) {
5378 case TGSI_PROCESSOR_FRAGMENT:
5379 for (i = 0; i < numInputs; i++) {
5380 unsigned array_id = 0;
5381 unsigned array_size;
5382
5383 if (find_array(inputSlotToAttr[i], program->input_arrays,
5384 program->num_input_arrays, &array_id, &array_size)) {
5385 /* We've found an array. Declare it so. */
5386 t->inputs[i] = ureg_DECL_fs_input_cyl_centroid(ureg,
5387 inputSemanticName[i], inputSemanticIndex[i],
5388 interpMode[i], 0, interpLocation[i],
5389 array_id, array_size);
5390 i += array_size - 1;
5391 }
5392 else {
5393 t->inputs[i] = ureg_DECL_fs_input_cyl_centroid(ureg,
5394 inputSemanticName[i], inputSemanticIndex[i],
5395 interpMode[i], 0, interpLocation[i], 0, 1);
5396 }
5397 }
5398 break;
5399 case TGSI_PROCESSOR_GEOMETRY:
5400 case TGSI_PROCESSOR_TESS_EVAL:
5401 case TGSI_PROCESSOR_TESS_CTRL:
5402 for (i = 0; i < numInputs; i++) {
5403 unsigned array_id = 0;
5404 unsigned array_size;
5405
5406 if (find_array(inputSlotToAttr[i], program->input_arrays,
5407 program->num_input_arrays, &array_id, &array_size)) {
5408 /* We've found an array. Declare it so. */
5409 t->inputs[i] = ureg_DECL_input(ureg, inputSemanticName[i],
5410 inputSemanticIndex[i],
5411 array_id, array_size);
5412 i += array_size - 1;
5413 }
5414 else {
5415 t->inputs[i] = ureg_DECL_input(ureg, inputSemanticName[i],
5416 inputSemanticIndex[i], 0, 1);
5417 }
5418 }
5419 break;
5420 case TGSI_PROCESSOR_VERTEX:
5421 for (i = 0; i < numInputs; i++) {
5422 t->inputs[i] = ureg_DECL_vs_input(ureg, i);
5423 }
5424 break;
5425 default:
5426 assert(0);
5427 }
5428
5429 /*
5430 * Declare output attributes.
5431 */
5432 switch (procType) {
5433 case TGSI_PROCESSOR_FRAGMENT:
5434 break;
5435 case TGSI_PROCESSOR_GEOMETRY:
5436 case TGSI_PROCESSOR_TESS_EVAL:
5437 case TGSI_PROCESSOR_TESS_CTRL:
5438 case TGSI_PROCESSOR_VERTEX:
5439 for (i = 0; i < numOutputs; i++) {
5440 unsigned array_id = 0;
5441 unsigned array_size;
5442
5443 if (find_array(outputSlotToAttr[i], program->output_arrays,
5444 program->num_output_arrays, &array_id, &array_size)) {
5445 /* We've found an array. Declare it so. */
5446 t->outputs[i] = ureg_DECL_output_array(ureg,
5447 outputSemanticName[i],
5448 outputSemanticIndex[i],
5449 array_id, array_size);
5450 i += array_size - 1;
5451 }
5452 else {
5453 t->outputs[i] = ureg_DECL_output(ureg,
5454 outputSemanticName[i],
5455 outputSemanticIndex[i]);
5456 }
5457 }
5458 break;
5459 default:
5460 assert(0);
5461 }
5462
5463 if (procType == TGSI_PROCESSOR_FRAGMENT) {
5464 if (proginfo->InputsRead & VARYING_BIT_POS) {
5465 /* Must do this after setting up t->inputs. */
5466 emit_wpos(st_context(ctx), t, proginfo, ureg,
5467 program->wpos_transform_const);
5468 }
5469
5470 if (proginfo->InputsRead & VARYING_BIT_FACE)
5471 emit_face_var(ctx, t);
5472
5473 for (i = 0; i < numOutputs; i++) {
5474 switch (outputSemanticName[i]) {
5475 case TGSI_SEMANTIC_POSITION:
5476 t->outputs[i] = ureg_DECL_output(ureg,
5477 TGSI_SEMANTIC_POSITION, /* Z/Depth */
5478 outputSemanticIndex[i]);
5479 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z);
5480 break;
5481 case TGSI_SEMANTIC_STENCIL:
5482 t->outputs[i] = ureg_DECL_output(ureg,
5483 TGSI_SEMANTIC_STENCIL, /* Stencil */
5484 outputSemanticIndex[i]);
5485 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y);
5486 break;
5487 case TGSI_SEMANTIC_COLOR:
5488 t->outputs[i] = ureg_DECL_output(ureg,
5489 TGSI_SEMANTIC_COLOR,
5490 outputSemanticIndex[i]);
5491 break;
5492 case TGSI_SEMANTIC_SAMPLEMASK:
5493 t->outputs[i] = ureg_DECL_output(ureg,
5494 TGSI_SEMANTIC_SAMPLEMASK,
5495 outputSemanticIndex[i]);
5496 /* TODO: If we ever support more than 32 samples, this will have
5497 * to become an array.
5498 */
5499 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X);
5500 break;
5501 default:
5502 assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR");
5503 ret = PIPE_ERROR_BAD_INPUT;
5504 goto out;
5505 }
5506 }
5507 }
5508 else if (procType == TGSI_PROCESSOR_VERTEX) {
5509 for (i = 0; i < numOutputs; i++) {
5510 if (outputSemanticName[i] == TGSI_SEMANTIC_FOG) {
5511 /* force register to contain a fog coordinate in the form (F, 0, 0, 1). */
5512 ureg_MOV(ureg,
5513 ureg_writemask(t->outputs[i], TGSI_WRITEMASK_YZW),
5514 ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
5515 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X);
5516 }
5517 }
5518 if (passthrough_edgeflags)
5519 emit_edgeflags(t);
5520 }
5521
5522 /* Declare address register.
5523 */
5524 if (program->num_address_regs > 0) {
5525 assert(program->num_address_regs <= 3);
5526 for (int i = 0; i < program->num_address_regs; i++)
5527 t->address[i] = ureg_DECL_address(ureg);
5528 }
5529
5530 /* Declare misc input registers
5531 */
5532 {
5533 GLbitfield sysInputs = proginfo->SystemValuesRead;
5534 unsigned numSys = 0;
5535 for (i = 0; sysInputs; i++) {
5536 if (sysInputs & (1 << i)) {
5537 unsigned semName = _mesa_sysval_to_semantic[i];
5538 t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0);
5539 if (semName == TGSI_SEMANTIC_INSTANCEID ||
5540 semName == TGSI_SEMANTIC_VERTEXID) {
5541 /* From Gallium perspective, these system values are always
5542 * integer, and require native integer support. However, if
5543 * native integer is supported on the vertex stage but not the
5544 * pixel stage (e.g, i915g + draw), Mesa will generate IR that
5545 * assumes these system values are floats. To resolve the
5546 * inconsistency, we insert a U2F.
5547 */
5548 struct st_context *st = st_context(ctx);
5549 struct pipe_screen *pscreen = st->pipe->screen;
5550 assert(procType == TGSI_PROCESSOR_VERTEX);
5551 assert(pscreen->get_shader_param(pscreen, PIPE_SHADER_VERTEX, PIPE_SHADER_CAP_INTEGERS));
5552 (void) pscreen;
5553 if (!ctx->Const.NativeIntegers) {
5554 struct ureg_dst temp = ureg_DECL_local_temporary(t->ureg);
5555 ureg_U2F( t->ureg, ureg_writemask(temp, TGSI_WRITEMASK_X), t->systemValues[i]);
5556 t->systemValues[i] = ureg_scalar(ureg_src(temp), 0);
5557 }
5558 }
5559 numSys++;
5560 sysInputs &= ~(1 << i);
5561 }
5562 }
5563 }
5564
5565 t->array_sizes = program->array_sizes;
5566 t->input_arrays = program->input_arrays;
5567 t->output_arrays = program->output_arrays;
5568
5569 /* Emit constants and uniforms. TGSI uses a single index space for these,
5570 * so we put all the translated regs in t->constants.
5571 */
5572 if (proginfo->Parameters) {
5573 t->constants = (struct ureg_src *)
5574 calloc(proginfo->Parameters->NumParameters, sizeof(t->constants[0]));
5575 if (t->constants == NULL) {
5576 ret = PIPE_ERROR_OUT_OF_MEMORY;
5577 goto out;
5578 }
5579 t->num_constants = proginfo->Parameters->NumParameters;
5580
5581 for (i = 0; i < proginfo->Parameters->NumParameters; i++) {
5582 switch (proginfo->Parameters->Parameters[i].Type) {
5583 case PROGRAM_STATE_VAR:
5584 case PROGRAM_UNIFORM:
5585 t->constants[i] = ureg_DECL_constant(ureg, i);
5586 break;
5587
5588 /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect
5589 * addressing of the const buffer.
5590 * FIXME: Be smarter and recognize param arrays:
5591 * indirect addressing is only valid within the referenced
5592 * array.
5593 */
5594 case PROGRAM_CONSTANT:
5595 if (program->indirect_addr_consts)
5596 t->constants[i] = ureg_DECL_constant(ureg, i);
5597 else
5598 t->constants[i] = emit_immediate(t,
5599 proginfo->Parameters->ParameterValues[i],
5600 proginfo->Parameters->Parameters[i].DataType,
5601 4);
5602 break;
5603 default:
5604 break;
5605 }
5606 }
5607 }
5608
5609 if (program->shader) {
5610 unsigned num_ubos = program->shader->NumUniformBlocks;
5611
5612 for (i = 0; i < num_ubos; i++) {
5613 unsigned size = program->shader->UniformBlocks[i].UniformBufferSize;
5614 unsigned num_const_vecs = (size + 15) / 16;
5615 unsigned first, last;
5616 assert(num_const_vecs > 0);
5617 first = 0;
5618 last = num_const_vecs > 0 ? num_const_vecs - 1 : 0;
5619 ureg_DECL_constant2D(t->ureg, first, last, i + 1);
5620 }
5621 }
5622
5623 /* Emit immediate values.
5624 */
5625 t->immediates = (struct ureg_src *)
5626 calloc(program->num_immediates, sizeof(struct ureg_src));
5627 if (t->immediates == NULL) {
5628 ret = PIPE_ERROR_OUT_OF_MEMORY;
5629 goto out;
5630 }
5631 t->num_immediates = program->num_immediates;
5632
5633 i = 0;
5634 foreach_in_list(immediate_storage, imm, &program->immediates) {
5635 assert(i < program->num_immediates);
5636 t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size32);
5637 }
5638 assert(i == program->num_immediates);
5639
5640 /* texture samplers */
5641 for (i = 0; i < ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; i++) {
5642 if (program->samplers_used & (1 << i)) {
5643 unsigned type;
5644
5645 t->samplers[i] = ureg_DECL_sampler(ureg, i);
5646
5647 switch (program->sampler_types[i]) {
5648 case GLSL_TYPE_INT:
5649 type = TGSI_RETURN_TYPE_SINT;
5650 break;
5651 case GLSL_TYPE_UINT:
5652 type = TGSI_RETURN_TYPE_UINT;
5653 break;
5654 case GLSL_TYPE_FLOAT:
5655 type = TGSI_RETURN_TYPE_FLOAT;
5656 break;
5657 default:
5658 unreachable("not reached");
5659 }
5660
5661 ureg_DECL_sampler_view( ureg, i, program->sampler_targets[i],
5662 type, type, type, type );
5663 }
5664 }
5665
5666 /* Emit each instruction in turn:
5667 */
5668 foreach_in_list(glsl_to_tgsi_instruction, inst, &program->instructions) {
5669 set_insn_start(t, ureg_get_instruction_number(ureg));
5670 compile_tgsi_instruction(t, inst, clamp_color);
5671 }
5672
5673 /* Fix up all emitted labels:
5674 */
5675 for (i = 0; i < t->labels_count; i++) {
5676 ureg_fixup_label(ureg, t->labels[i].token,
5677 t->insn[t->labels[i].branch_target]);
5678 }
5679
5680 out:
5681 if (t) {
5682 free(t->arrays);
5683 free(t->temps);
5684 free(t->insn);
5685 free(t->labels);
5686 free(t->constants);
5687 t->num_constants = 0;
5688 free(t->immediates);
5689 t->num_immediates = 0;
5690
5691 if (t->error) {
5692 debug_printf("%s: translate error flag set\n", __func__);
5693 }
5694
5695 FREE(t);
5696 }
5697
5698 return ret;
5699 }
5700 /* ----------------------------- End TGSI code ------------------------------ */
5701
5702
5703 /**
5704 * Convert a shader's GLSL IR into a Mesa gl_program, although without
5705 * generating Mesa IR.
5706 */
5707 static struct gl_program *
5708 get_mesa_program(struct gl_context *ctx,
5709 struct gl_shader_program *shader_program,
5710 struct gl_shader *shader)
5711 {
5712 glsl_to_tgsi_visitor* v;
5713 struct gl_program *prog;
5714 GLenum target = _mesa_shader_stage_to_program(shader->Stage);
5715 bool progress;
5716 struct gl_shader_compiler_options *options =
5717 &ctx->Const.ShaderCompilerOptions[_mesa_shader_enum_to_shader_stage(shader->Type)];
5718 struct pipe_screen *pscreen = ctx->st->pipe->screen;
5719 unsigned ptarget = st_shader_stage_to_ptarget(shader->Stage);
5720
5721 validate_ir_tree(shader->ir);
5722
5723 prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
5724 if (!prog)
5725 return NULL;
5726 prog->Parameters = _mesa_new_parameter_list();
5727 v = new glsl_to_tgsi_visitor();
5728 v->ctx = ctx;
5729 v->prog = prog;
5730 v->shader_program = shader_program;
5731 v->shader = shader;
5732 v->options = options;
5733 v->glsl_version = ctx->Const.GLSLVersion;
5734 v->native_integers = ctx->Const.NativeIntegers;
5735
5736 v->have_sqrt = pscreen->get_shader_param(pscreen, ptarget,
5737 PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED);
5738 v->have_fma = pscreen->get_shader_param(pscreen, ptarget,
5739 PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED);
5740
5741 _mesa_copy_linked_program_data(shader->Stage, shader_program, prog);
5742 _mesa_generate_parameters_list_for_uniforms(shader_program, shader,
5743 prog->Parameters);
5744
5745 /* Remove reads from output registers. */
5746 lower_output_reads(shader->Stage, shader->ir);
5747
5748 /* Emit intermediate IR for main(). */
5749 visit_exec_list(shader->ir, v);
5750
5751 /* Now emit bodies for any functions that were used. */
5752 do {
5753 progress = GL_FALSE;
5754
5755 foreach_in_list(function_entry, entry, &v->function_signatures) {
5756 if (!entry->bgn_inst) {
5757 v->current_function = entry;
5758
5759 entry->bgn_inst = v->emit_asm(NULL, TGSI_OPCODE_BGNSUB);
5760 entry->bgn_inst->function = entry;
5761
5762 visit_exec_list(&entry->sig->body, v);
5763
5764 glsl_to_tgsi_instruction *last;
5765 last = (glsl_to_tgsi_instruction *)v->instructions.get_tail();
5766 if (last->op != TGSI_OPCODE_RET)
5767 v->emit_asm(NULL, TGSI_OPCODE_RET);
5768
5769 glsl_to_tgsi_instruction *end;
5770 end = v->emit_asm(NULL, TGSI_OPCODE_ENDSUB);
5771 end->function = entry;
5772
5773 progress = GL_TRUE;
5774 }
5775 }
5776 } while (progress);
5777
5778 #if 0
5779 /* Print out some information (for debugging purposes) used by the
5780 * optimization passes. */
5781 {
5782 int i;
5783 int *first_writes = rzalloc_array(v->mem_ctx, int, v->next_temp);
5784 int *first_reads = rzalloc_array(v->mem_ctx, int, v->next_temp);
5785 int *last_writes = rzalloc_array(v->mem_ctx, int, v->next_temp);
5786 int *last_reads = rzalloc_array(v->mem_ctx, int, v->next_temp);
5787
5788 for (i = 0; i < v->next_temp; i++) {
5789 first_writes[i] = -1;
5790 first_reads[i] = -1;
5791 last_writes[i] = -1;
5792 last_reads[i] = -1;
5793 }
5794 v->get_first_temp_read(first_reads);
5795 v->get_last_temp_read_first_temp_write(last_reads, first_writes);
5796 v->get_last_temp_write(last_writes);
5797 for (i = 0; i < v->next_temp; i++)
5798 printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, first_reads[i],
5799 first_writes[i],
5800 last_reads[i],
5801 last_writes[i]);
5802 ralloc_free(first_writes);
5803 ralloc_free(first_reads);
5804 ralloc_free(last_writes);
5805 ralloc_free(last_reads);
5806 }
5807 #endif
5808
5809 /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
5810 v->simplify_cmp();
5811
5812 if (shader->Type != GL_TESS_CONTROL_SHADER &&
5813 shader->Type != GL_TESS_EVALUATION_SHADER)
5814 v->copy_propagate();
5815
5816 while (v->eliminate_dead_code());
5817
5818 v->merge_two_dsts();
5819 v->merge_registers();
5820 v->renumber_registers();
5821
5822 /* Write the END instruction. */
5823 v->emit_asm(NULL, TGSI_OPCODE_END);
5824
5825 if (ctx->_Shader->Flags & GLSL_DUMP) {
5826 _mesa_log("\n");
5827 _mesa_log("GLSL IR for linked %s program %d:\n",
5828 _mesa_shader_stage_to_string(shader->Stage),
5829 shader_program->Name);
5830 _mesa_print_ir(_mesa_get_log_file(), shader->ir, NULL);
5831 _mesa_log("\n\n");
5832 }
5833
5834 prog->Instructions = NULL;
5835 prog->NumInstructions = 0;
5836
5837 do_set_program_inouts(shader->ir, prog, shader->Stage);
5838 shrink_array_declarations(v->input_arrays, v->num_input_arrays,
5839 prog->InputsRead, prog->PatchInputsRead);
5840 shrink_array_declarations(v->output_arrays, v->num_output_arrays,
5841 prog->OutputsWritten, prog->PatchOutputsWritten);
5842 count_resources(v, prog);
5843
5844 /* This must be done before the uniform storage is associated. */
5845 if (shader->Type == GL_FRAGMENT_SHADER &&
5846 prog->InputsRead & VARYING_BIT_POS){
5847 static const gl_state_index wposTransformState[STATE_LENGTH] = {
5848 STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM
5849 };
5850
5851 v->wpos_transform_const = _mesa_add_state_reference(prog->Parameters,
5852 wposTransformState);
5853 }
5854
5855 _mesa_reference_program(ctx, &shader->Program, prog);
5856
5857 /* This has to be done last. Any operation the can cause
5858 * prog->ParameterValues to get reallocated (e.g., anything that adds a
5859 * program constant) has to happen before creating this linkage.
5860 */
5861 _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters);
5862 if (!shader_program->LinkStatus) {
5863 free_glsl_to_tgsi_visitor(v);
5864 return NULL;
5865 }
5866
5867 struct st_vertex_program *stvp;
5868 struct st_fragment_program *stfp;
5869 struct st_geometry_program *stgp;
5870 struct st_tessctrl_program *sttcp;
5871 struct st_tesseval_program *sttep;
5872
5873 switch (shader->Type) {
5874 case GL_VERTEX_SHADER:
5875 stvp = (struct st_vertex_program *)prog;
5876 stvp->glsl_to_tgsi = v;
5877 break;
5878 case GL_FRAGMENT_SHADER:
5879 stfp = (struct st_fragment_program *)prog;
5880 stfp->glsl_to_tgsi = v;
5881 break;
5882 case GL_GEOMETRY_SHADER:
5883 stgp = (struct st_geometry_program *)prog;
5884 stgp->glsl_to_tgsi = v;
5885 break;
5886 case GL_TESS_CONTROL_SHADER:
5887 sttcp = (struct st_tessctrl_program *)prog;
5888 sttcp->glsl_to_tgsi = v;
5889 break;
5890 case GL_TESS_EVALUATION_SHADER:
5891 sttep = (struct st_tesseval_program *)prog;
5892 sttep->glsl_to_tgsi = v;
5893 break;
5894 default:
5895 assert(!"should not be reached");
5896 return NULL;
5897 }
5898
5899 return prog;
5900 }
5901
5902 extern "C" {
5903
5904 static void
5905 st_dump_program_for_shader_db(struct gl_context *ctx,
5906 struct gl_shader_program *prog)
5907 {
5908 /* Dump only successfully compiled and linked shaders to the specified
5909 * file. This is for shader-db.
5910 *
5911 * These options allow some pre-processing of shaders while dumping,
5912 * because some apps have ill-formed shaders.
5913 */
5914 const char *dump_filename = os_get_option("ST_DUMP_SHADERS");
5915 const char *insert_directives = os_get_option("ST_DUMP_INSERT");
5916
5917 if (dump_filename && prog->Name != 0) {
5918 FILE *f = fopen(dump_filename, "a");
5919
5920 if (f) {
5921 for (unsigned i = 0; i < prog->NumShaders; i++) {
5922 const struct gl_shader *sh = prog->Shaders[i];
5923 const char *source;
5924 bool skip_version = false;
5925
5926 if (!sh)
5927 continue;
5928
5929 source = sh->Source;
5930
5931 /* This string mustn't be changed. shader-db uses it to find
5932 * where the shader begins.
5933 */
5934 fprintf(f, "GLSL %s shader %d source for linked program %d:\n",
5935 _mesa_shader_stage_to_string(sh->Stage),
5936 i, prog->Name);
5937
5938 /* Dump the forced version if set. */
5939 if (ctx->Const.ForceGLSLVersion) {
5940 fprintf(f, "#version %i\n", ctx->Const.ForceGLSLVersion);
5941 skip_version = true;
5942 }
5943
5944 /* Insert directives (optional). */
5945 if (insert_directives) {
5946 if (!ctx->Const.ForceGLSLVersion && prog->Version)
5947 fprintf(f, "#version %i\n", prog->Version);
5948 fprintf(f, "%s\n", insert_directives);
5949 skip_version = true;
5950 }
5951
5952 if (skip_version && strncmp(source, "#version ", 9) == 0) {
5953 const char *next_line = strstr(source, "\n");
5954
5955 if (next_line)
5956 source = next_line + 1;
5957 else
5958 continue;
5959 }
5960
5961 fprintf(f, "%s", source);
5962 fprintf(f, "\n");
5963 }
5964 fclose(f);
5965 }
5966 }
5967 }
5968
5969 /**
5970 * Link a shader.
5971 * Called via ctx->Driver.LinkShader()
5972 * This actually involves converting GLSL IR into an intermediate TGSI-like IR
5973 * with code lowering and other optimizations.
5974 */
5975 GLboolean
5976 st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
5977 {
5978 struct pipe_screen *pscreen = ctx->st->pipe->screen;
5979 assert(prog->LinkStatus);
5980
5981 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
5982 if (prog->_LinkedShaders[i] == NULL)
5983 continue;
5984
5985 bool progress;
5986 exec_list *ir = prog->_LinkedShaders[i]->ir;
5987 gl_shader_stage stage = _mesa_shader_enum_to_shader_stage(prog->_LinkedShaders[i]->Type);
5988 const struct gl_shader_compiler_options *options =
5989 &ctx->Const.ShaderCompilerOptions[stage];
5990 unsigned ptarget = st_shader_stage_to_ptarget(stage);
5991 bool have_dround = pscreen->get_shader_param(pscreen, ptarget,
5992 PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED);
5993 bool have_dfrexp = pscreen->get_shader_param(pscreen, ptarget,
5994 PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED);
5995
5996 /* If there are forms of indirect addressing that the driver
5997 * cannot handle, perform the lowering pass.
5998 */
5999 if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput ||
6000 options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) {
6001 lower_variable_index_to_cond_assign(prog->_LinkedShaders[i]->Stage, ir,
6002 options->EmitNoIndirectInput,
6003 options->EmitNoIndirectOutput,
6004 options->EmitNoIndirectTemp,
6005 options->EmitNoIndirectUniform);
6006 }
6007
6008 if (ctx->Extensions.ARB_shading_language_packing) {
6009 unsigned lower_inst = LOWER_PACK_SNORM_2x16 |
6010 LOWER_UNPACK_SNORM_2x16 |
6011 LOWER_PACK_UNORM_2x16 |
6012 LOWER_UNPACK_UNORM_2x16 |
6013 LOWER_PACK_SNORM_4x8 |
6014 LOWER_UNPACK_SNORM_4x8 |
6015 LOWER_UNPACK_UNORM_4x8 |
6016 LOWER_PACK_UNORM_4x8 |
6017 LOWER_PACK_HALF_2x16 |
6018 LOWER_UNPACK_HALF_2x16;
6019
6020 if (ctx->Extensions.ARB_gpu_shader5)
6021 lower_inst |= LOWER_PACK_USE_BFI |
6022 LOWER_PACK_USE_BFE;
6023
6024 lower_packing_builtins(ir, lower_inst);
6025 }
6026
6027 if (!pscreen->get_param(pscreen, PIPE_CAP_TEXTURE_GATHER_OFFSETS))
6028 lower_offset_arrays(ir);
6029 do_mat_op_to_vec(ir);
6030 lower_instructions(ir,
6031 MOD_TO_FLOOR |
6032 DIV_TO_MUL_RCP |
6033 EXP_TO_EXP2 |
6034 LOG_TO_LOG2 |
6035 LDEXP_TO_ARITH |
6036 (have_dfrexp ? 0 : DFREXP_DLDEXP_TO_ARITH) |
6037 CARRY_TO_ARITH |
6038 BORROW_TO_ARITH |
6039 (have_dround ? 0 : DOPS_TO_DFRAC) |
6040 (options->EmitNoPow ? POW_TO_EXP2 : 0) |
6041 (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0) |
6042 (options->EmitNoSat ? SAT_TO_CLAMP : 0));
6043
6044 lower_ubo_reference(prog->_LinkedShaders[i], ir);
6045 do_vec_index_to_cond_assign(ir);
6046 lower_vector_insert(ir, true);
6047 lower_quadop_vector(ir, false);
6048 lower_noise(ir);
6049 if (options->MaxIfDepth == 0) {
6050 lower_discard(ir);
6051 }
6052
6053 do {
6054 progress = false;
6055
6056 progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
6057
6058 progress = do_common_optimization(ir, true, true, options,
6059 ctx->Const.NativeIntegers)
6060 || progress;
6061
6062 progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress;
6063
6064 } while (progress);
6065
6066 validate_ir_tree(ir);
6067 }
6068
6069 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
6070 struct gl_program *linked_prog;
6071
6072 if (prog->_LinkedShaders[i] == NULL)
6073 continue;
6074
6075 linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
6076
6077 if (linked_prog) {
6078 _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
6079 linked_prog);
6080 if (!ctx->Driver.ProgramStringNotify(ctx,
6081 _mesa_shader_stage_to_program(i),
6082 linked_prog)) {
6083 _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
6084 NULL);
6085 _mesa_reference_program(ctx, &linked_prog, NULL);
6086 return GL_FALSE;
6087 }
6088 }
6089
6090 _mesa_reference_program(ctx, &linked_prog, NULL);
6091 }
6092
6093 st_dump_program_for_shader_db(ctx, prog);
6094 return GL_TRUE;
6095 }
6096
6097 void
6098 st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi,
6099 const GLuint outputMapping[],
6100 struct pipe_stream_output_info *so)
6101 {
6102 unsigned i;
6103 struct gl_transform_feedback_info *info =
6104 &glsl_to_tgsi->shader_program->LinkedTransformFeedback;
6105
6106 for (i = 0; i < info->NumOutputs; i++) {
6107 so->output[i].register_index =
6108 outputMapping[info->Outputs[i].OutputRegister];
6109 so->output[i].start_component = info->Outputs[i].ComponentOffset;
6110 so->output[i].num_components = info->Outputs[i].NumComponents;
6111 so->output[i].output_buffer = info->Outputs[i].OutputBuffer;
6112 so->output[i].dst_offset = info->Outputs[i].DstOffset;
6113 so->output[i].stream = info->Outputs[i].StreamId;
6114 }
6115
6116 for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
6117 so->stride[i] = info->BufferStride[i];
6118 }
6119 so->num_outputs = info->NumOutputs;
6120 }
6121
6122 } /* extern "C" */