st/mesa: use common double inputs read field
[mesa.git] / src / mesa / state_tracker / st_glsl_to_tgsi.cpp
1 /*
2 * Copyright (C) 2005-2007 Brian Paul All Rights Reserved.
3 * Copyright (C) 2008 VMware, Inc. All Rights Reserved.
4 * Copyright © 2010 Intel Corporation
5 * Copyright © 2011 Bryan Cain
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 * DEALINGS IN THE SOFTWARE.
25 */
26
27 /**
28 * \file glsl_to_tgsi.cpp
29 *
30 * Translate GLSL IR to TGSI.
31 */
32
33 #include "st_glsl_to_tgsi.h"
34
35 #include "compiler/glsl/glsl_parser_extras.h"
36 #include "compiler/glsl/ir_optimization.h"
37 #include "compiler/glsl/program.h"
38
39 #include "main/errors.h"
40 #include "main/shaderobj.h"
41 #include "main/uniforms.h"
42 #include "main/shaderapi.h"
43 #include "main/shaderimage.h"
44 #include "program/prog_instruction.h"
45
46 #include "pipe/p_context.h"
47 #include "pipe/p_screen.h"
48 #include "tgsi/tgsi_ureg.h"
49 #include "tgsi/tgsi_info.h"
50 #include "util/u_math.h"
51 #include "util/u_memory.h"
52 #include "st_program.h"
53 #include "st_mesa_to_tgsi.h"
54 #include "st_format.h"
55 #include "st_glsl_types.h"
56 #include "st_nir.h"
57
58 #include <algorithm>
59
60 #define PROGRAM_ANY_CONST ((1 << PROGRAM_STATE_VAR) | \
61 (1 << PROGRAM_CONSTANT) | \
62 (1 << PROGRAM_UNIFORM))
63
64 #define MAX_GLSL_TEXTURE_OFFSET 4
65
66 class st_src_reg;
67 class st_dst_reg;
68
69 static int swizzle_for_size(int size);
70
71 static int swizzle_for_type(const glsl_type *type, int component = 0)
72 {
73 unsigned num_elements = 4;
74
75 if (type) {
76 type = type->without_array();
77 if (type->is_scalar() || type->is_vector() || type->is_matrix())
78 num_elements = type->vector_elements;
79 }
80
81 int swizzle = swizzle_for_size(num_elements);
82 assert(num_elements + component <= 4);
83
84 swizzle += component * MAKE_SWIZZLE4(1, 1, 1, 1);
85 return swizzle;
86 }
87
88 /**
89 * This struct is a corresponding struct to TGSI ureg_src.
90 */
91 class st_src_reg {
92 public:
93 st_src_reg(gl_register_file file, int index, const glsl_type *type,
94 int component = 0, unsigned array_id = 0)
95 {
96 assert(file != PROGRAM_ARRAY || array_id != 0);
97 this->file = file;
98 this->index = index;
99 this->swizzle = swizzle_for_type(type, component);
100 this->negate = 0;
101 this->index2D = 0;
102 this->type = type ? type->base_type : GLSL_TYPE_ERROR;
103 this->reladdr = NULL;
104 this->reladdr2 = NULL;
105 this->has_index2 = false;
106 this->double_reg2 = false;
107 this->array_id = array_id;
108 this->is_double_vertex_input = false;
109 }
110
111 st_src_reg(gl_register_file file, int index, enum glsl_base_type type)
112 {
113 assert(file != PROGRAM_ARRAY); /* need array_id > 0 */
114 this->type = type;
115 this->file = file;
116 this->index = index;
117 this->index2D = 0;
118 this->swizzle = SWIZZLE_XYZW;
119 this->negate = 0;
120 this->reladdr = NULL;
121 this->reladdr2 = NULL;
122 this->has_index2 = false;
123 this->double_reg2 = false;
124 this->array_id = 0;
125 this->is_double_vertex_input = false;
126 }
127
128 st_src_reg(gl_register_file file, int index, enum glsl_base_type type, int index2D)
129 {
130 assert(file != PROGRAM_ARRAY); /* need array_id > 0 */
131 this->type = type;
132 this->file = file;
133 this->index = index;
134 this->index2D = index2D;
135 this->swizzle = SWIZZLE_XYZW;
136 this->negate = 0;
137 this->reladdr = NULL;
138 this->reladdr2 = NULL;
139 this->has_index2 = false;
140 this->double_reg2 = false;
141 this->array_id = 0;
142 this->is_double_vertex_input = false;
143 }
144
145 st_src_reg()
146 {
147 this->type = GLSL_TYPE_ERROR;
148 this->file = PROGRAM_UNDEFINED;
149 this->index = 0;
150 this->index2D = 0;
151 this->swizzle = 0;
152 this->negate = 0;
153 this->reladdr = NULL;
154 this->reladdr2 = NULL;
155 this->has_index2 = false;
156 this->double_reg2 = false;
157 this->array_id = 0;
158 this->is_double_vertex_input = false;
159 }
160
161 explicit st_src_reg(st_dst_reg reg);
162
163 int16_t index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */
164 int16_t index2D;
165 uint16_t swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
166 int negate:4; /**< NEGATE_XYZW mask from mesa */
167 enum glsl_base_type type:4; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
168 unsigned has_index2:1;
169 gl_register_file file:5; /**< PROGRAM_* from Mesa */
170 /*
171 * Is this the second half of a double register pair?
172 * currently used for input mapping only.
173 */
174 unsigned double_reg2:1;
175 unsigned is_double_vertex_input:1;
176 unsigned array_id:10;
177
178 /** Register index should be offset by the integer in this reg. */
179 st_src_reg *reladdr;
180 st_src_reg *reladdr2;
181 };
182
183 class st_dst_reg {
184 public:
185 st_dst_reg(gl_register_file file, int writemask, enum glsl_base_type type, int index)
186 {
187 assert(file != PROGRAM_ARRAY); /* need array_id > 0 */
188 this->file = file;
189 this->index = index;
190 this->index2D = 0;
191 this->writemask = writemask;
192 this->reladdr = NULL;
193 this->reladdr2 = NULL;
194 this->has_index2 = false;
195 this->type = type;
196 this->array_id = 0;
197 }
198
199 st_dst_reg(gl_register_file file, int writemask, enum glsl_base_type type)
200 {
201 assert(file != PROGRAM_ARRAY); /* need array_id > 0 */
202 this->file = file;
203 this->index = 0;
204 this->index2D = 0;
205 this->writemask = writemask;
206 this->reladdr = NULL;
207 this->reladdr2 = NULL;
208 this->has_index2 = false;
209 this->type = type;
210 this->array_id = 0;
211 }
212
213 st_dst_reg()
214 {
215 this->type = GLSL_TYPE_ERROR;
216 this->file = PROGRAM_UNDEFINED;
217 this->index = 0;
218 this->index2D = 0;
219 this->writemask = 0;
220 this->reladdr = NULL;
221 this->reladdr2 = NULL;
222 this->has_index2 = false;
223 this->array_id = 0;
224 }
225
226 explicit st_dst_reg(st_src_reg reg);
227
228 int16_t index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */
229 int16_t index2D;
230 gl_register_file file:5; /**< PROGRAM_* from Mesa */
231 unsigned writemask:4; /**< Bitfield of WRITEMASK_[XYZW] */
232 enum glsl_base_type type:4; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
233 unsigned has_index2:1;
234 unsigned array_id:10;
235
236 /** Register index should be offset by the integer in this reg. */
237 st_src_reg *reladdr;
238 st_src_reg *reladdr2;
239 };
240
241 st_src_reg::st_src_reg(st_dst_reg reg)
242 {
243 this->type = reg.type;
244 this->file = reg.file;
245 this->index = reg.index;
246 this->swizzle = SWIZZLE_XYZW;
247 this->negate = 0;
248 this->reladdr = reg.reladdr;
249 this->index2D = reg.index2D;
250 this->reladdr2 = reg.reladdr2;
251 this->has_index2 = reg.has_index2;
252 this->double_reg2 = false;
253 this->array_id = reg.array_id;
254 this->is_double_vertex_input = false;
255 }
256
257 st_dst_reg::st_dst_reg(st_src_reg reg)
258 {
259 this->type = reg.type;
260 this->file = reg.file;
261 this->index = reg.index;
262 this->writemask = WRITEMASK_XYZW;
263 this->reladdr = reg.reladdr;
264 this->index2D = reg.index2D;
265 this->reladdr2 = reg.reladdr2;
266 this->has_index2 = reg.has_index2;
267 this->array_id = reg.array_id;
268 }
269
270 class glsl_to_tgsi_instruction : public exec_node {
271 public:
272 DECLARE_RALLOC_CXX_OPERATORS(glsl_to_tgsi_instruction)
273
274 st_dst_reg dst[2];
275 st_src_reg src[4];
276 st_src_reg resource; /**< sampler or buffer register */
277 st_src_reg *tex_offsets;
278
279 /** Pointer to the ir source this tree came from for debugging */
280 ir_instruction *ir;
281
282 unsigned op:8; /**< TGSI opcode */
283 unsigned saturate:1;
284 unsigned is_64bit_expanded:1;
285 unsigned sampler_base:5;
286 unsigned sampler_array_size:6; /**< 1-based size of sampler array, 1 if not array */
287 unsigned tex_target:4; /**< One of TEXTURE_*_INDEX */
288 glsl_base_type tex_type:4;
289 unsigned tex_shadow:1;
290 unsigned image_format:9;
291 unsigned tex_offset_num_offset:3;
292 unsigned dead_mask:4; /**< Used in dead code elimination */
293 unsigned buffer_access:3; /**< buffer access type */
294
295 const struct tgsi_opcode_info *info;
296 };
297
298 class variable_storage : public exec_node {
299 public:
300 variable_storage(ir_variable *var, gl_register_file file, int index,
301 unsigned array_id = 0)
302 : file(file), index(index), component(0), var(var), array_id(array_id)
303 {
304 assert(file != PROGRAM_ARRAY || array_id != 0);
305 }
306
307 gl_register_file file;
308 int index;
309
310 /* Explicit component location. This is given in terms of the GLSL-style
311 * swizzles where each double is a single component, i.e. for 64-bit types
312 * it can only be 0 or 1.
313 */
314 int component;
315 ir_variable *var; /* variable that maps to this, if any */
316 unsigned array_id;
317 };
318
319 class immediate_storage : public exec_node {
320 public:
321 immediate_storage(gl_constant_value *values, int size32, int type)
322 {
323 memcpy(this->values, values, size32 * sizeof(gl_constant_value));
324 this->size32 = size32;
325 this->type = type;
326 }
327
328 /* doubles are stored across 2 gl_constant_values */
329 gl_constant_value values[4];
330 int size32; /**< Number of 32-bit components (1-4) */
331 int type; /**< GL_DOUBLE, GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */
332 };
333
334 static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR);
335 static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR);
336
337 struct inout_decl {
338 unsigned mesa_index;
339 unsigned array_id; /* TGSI ArrayID; 1-based: 0 means not an array */
340 unsigned size;
341 unsigned interp_loc;
342 enum glsl_interp_mode interp;
343 enum glsl_base_type base_type;
344 ubyte usage_mask; /* GLSL-style usage-mask, i.e. single bit per double */
345 };
346
347 static struct inout_decl *
348 find_inout_array(struct inout_decl *decls, unsigned count, unsigned array_id)
349 {
350 assert(array_id != 0);
351
352 for (unsigned i = 0; i < count; i++) {
353 struct inout_decl *decl = &decls[i];
354
355 if (array_id == decl->array_id) {
356 return decl;
357 }
358 }
359
360 return NULL;
361 }
362
363 static enum glsl_base_type
364 find_array_type(struct inout_decl *decls, unsigned count, unsigned array_id)
365 {
366 if (!array_id)
367 return GLSL_TYPE_ERROR;
368 struct inout_decl *decl = find_inout_array(decls, count, array_id);
369 if (decl)
370 return decl->base_type;
371 return GLSL_TYPE_ERROR;
372 }
373
374 struct rename_reg_pair {
375 int old_reg;
376 int new_reg;
377 };
378
379 struct glsl_to_tgsi_visitor : public ir_visitor {
380 public:
381 glsl_to_tgsi_visitor();
382 ~glsl_to_tgsi_visitor();
383
384 struct gl_context *ctx;
385 struct gl_program *prog;
386 struct gl_shader_program *shader_program;
387 struct gl_linked_shader *shader;
388 struct gl_shader_compiler_options *options;
389
390 int next_temp;
391
392 unsigned *array_sizes;
393 unsigned max_num_arrays;
394 unsigned next_array;
395
396 struct inout_decl inputs[4 * PIPE_MAX_SHADER_INPUTS];
397 unsigned num_inputs;
398 unsigned num_input_arrays;
399 struct inout_decl outputs[4 * PIPE_MAX_SHADER_OUTPUTS];
400 unsigned num_outputs;
401 unsigned num_output_arrays;
402
403 int num_address_regs;
404 uint32_t samplers_used;
405 glsl_base_type sampler_types[PIPE_MAX_SAMPLERS];
406 int sampler_targets[PIPE_MAX_SAMPLERS]; /**< One of TGSI_TEXTURE_* */
407 int buffers_used;
408 int images_used;
409 int image_targets[PIPE_MAX_SHADER_IMAGES];
410 unsigned image_formats[PIPE_MAX_SHADER_IMAGES];
411 bool indirect_addr_consts;
412 int wpos_transform_const;
413
414 int glsl_version;
415 bool native_integers;
416 bool have_sqrt;
417 bool have_fma;
418 bool use_shared_memory;
419
420 variable_storage *find_variable_storage(ir_variable *var);
421
422 int add_constant(gl_register_file file, gl_constant_value values[8],
423 int size, int datatype, uint16_t *swizzle_out);
424
425 st_src_reg get_temp(const glsl_type *type);
426 void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr);
427
428 st_src_reg st_src_reg_for_double(double val);
429 st_src_reg st_src_reg_for_float(float val);
430 st_src_reg st_src_reg_for_int(int val);
431 st_src_reg st_src_reg_for_type(enum glsl_base_type type, int val);
432
433 /**
434 * \name Visit methods
435 *
436 * As typical for the visitor pattern, there must be one \c visit method for
437 * each concrete subclass of \c ir_instruction. Virtual base classes within
438 * the hierarchy should not have \c visit methods.
439 */
440 /*@{*/
441 virtual void visit(ir_variable *);
442 virtual void visit(ir_loop *);
443 virtual void visit(ir_loop_jump *);
444 virtual void visit(ir_function_signature *);
445 virtual void visit(ir_function *);
446 virtual void visit(ir_expression *);
447 virtual void visit(ir_swizzle *);
448 virtual void visit(ir_dereference_variable *);
449 virtual void visit(ir_dereference_array *);
450 virtual void visit(ir_dereference_record *);
451 virtual void visit(ir_assignment *);
452 virtual void visit(ir_constant *);
453 virtual void visit(ir_call *);
454 virtual void visit(ir_return *);
455 virtual void visit(ir_discard *);
456 virtual void visit(ir_texture *);
457 virtual void visit(ir_if *);
458 virtual void visit(ir_emit_vertex *);
459 virtual void visit(ir_end_primitive *);
460 virtual void visit(ir_barrier *);
461 /*@}*/
462
463 void visit_expression(ir_expression *, st_src_reg *) ATTRIBUTE_NOINLINE;
464
465 void visit_atomic_counter_intrinsic(ir_call *);
466 void visit_ssbo_intrinsic(ir_call *);
467 void visit_membar_intrinsic(ir_call *);
468 void visit_shared_intrinsic(ir_call *);
469 void visit_image_intrinsic(ir_call *);
470
471 st_src_reg result;
472
473 /** List of variable_storage */
474 exec_list variables;
475
476 /** List of immediate_storage */
477 exec_list immediates;
478 unsigned num_immediates;
479
480 /** List of glsl_to_tgsi_instruction */
481 exec_list instructions;
482
483 glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op,
484 st_dst_reg dst = undef_dst,
485 st_src_reg src0 = undef_src,
486 st_src_reg src1 = undef_src,
487 st_src_reg src2 = undef_src,
488 st_src_reg src3 = undef_src);
489
490 glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op,
491 st_dst_reg dst, st_dst_reg dst1,
492 st_src_reg src0 = undef_src,
493 st_src_reg src1 = undef_src,
494 st_src_reg src2 = undef_src,
495 st_src_reg src3 = undef_src);
496
497 unsigned get_opcode(unsigned op,
498 st_dst_reg dst,
499 st_src_reg src0, st_src_reg src1);
500
501 /**
502 * Emit the correct dot-product instruction for the type of arguments
503 */
504 glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir,
505 st_dst_reg dst,
506 st_src_reg src0,
507 st_src_reg src1,
508 unsigned elements);
509
510 void emit_scalar(ir_instruction *ir, unsigned op,
511 st_dst_reg dst, st_src_reg src0);
512
513 void emit_scalar(ir_instruction *ir, unsigned op,
514 st_dst_reg dst, st_src_reg src0, st_src_reg src1);
515
516 void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0);
517
518 void get_deref_offsets(ir_dereference *ir,
519 unsigned *array_size,
520 unsigned *base,
521 uint16_t *index,
522 st_src_reg *reladdr,
523 bool opaque);
524 void calc_deref_offsets(ir_dereference *tail,
525 unsigned *array_elements,
526 uint16_t *index,
527 st_src_reg *indirect,
528 unsigned *location);
529 st_src_reg canonicalize_gather_offset(st_src_reg offset);
530
531 bool try_emit_mad(ir_expression *ir,
532 int mul_operand);
533 bool try_emit_mad_for_and_not(ir_expression *ir,
534 int mul_operand);
535
536 void emit_swz(ir_expression *ir);
537
538 bool process_move_condition(ir_rvalue *ir);
539
540 void simplify_cmp(void);
541
542 void rename_temp_registers(int num_renames, struct rename_reg_pair *renames);
543 void get_first_temp_read(int *first_reads);
544 void get_last_temp_read_first_temp_write(int *last_reads, int *first_writes);
545 void get_last_temp_write(int *last_writes);
546
547 void copy_propagate(void);
548 int eliminate_dead_code(void);
549
550 void merge_two_dsts(void);
551 void merge_registers(void);
552 void renumber_registers(void);
553
554 void emit_block_mov(ir_assignment *ir, const struct glsl_type *type,
555 st_dst_reg *l, st_src_reg *r,
556 st_src_reg *cond, bool cond_swap);
557
558 void *mem_ctx;
559 };
560
561 static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 0);
562 static st_dst_reg address_reg2 = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 1);
563 static st_dst_reg sampler_reladdr = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 2);
564
565 static void
566 fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3);
567
568 static void
569 fail_link(struct gl_shader_program *prog, const char *fmt, ...)
570 {
571 va_list args;
572 va_start(args, fmt);
573 ralloc_vasprintf_append(&prog->InfoLog, fmt, args);
574 va_end(args);
575
576 prog->LinkStatus = GL_FALSE;
577 }
578
579 static int
580 swizzle_for_size(int size)
581 {
582 static const int size_swizzles[4] = {
583 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
584 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
585 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
586 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
587 };
588
589 assert((size >= 1) && (size <= 4));
590 return size_swizzles[size - 1];
591 }
592
593 static bool
594 is_resource_instruction(unsigned opcode)
595 {
596 switch (opcode) {
597 case TGSI_OPCODE_RESQ:
598 case TGSI_OPCODE_LOAD:
599 case TGSI_OPCODE_ATOMUADD:
600 case TGSI_OPCODE_ATOMXCHG:
601 case TGSI_OPCODE_ATOMCAS:
602 case TGSI_OPCODE_ATOMAND:
603 case TGSI_OPCODE_ATOMOR:
604 case TGSI_OPCODE_ATOMXOR:
605 case TGSI_OPCODE_ATOMUMIN:
606 case TGSI_OPCODE_ATOMUMAX:
607 case TGSI_OPCODE_ATOMIMIN:
608 case TGSI_OPCODE_ATOMIMAX:
609 return true;
610 default:
611 return false;
612 }
613 }
614
615 static unsigned
616 num_inst_dst_regs(const glsl_to_tgsi_instruction *op)
617 {
618 return op->info->num_dst;
619 }
620
621 static unsigned
622 num_inst_src_regs(const glsl_to_tgsi_instruction *op)
623 {
624 return op->info->is_tex || is_resource_instruction(op->op) ?
625 op->info->num_src - 1 : op->info->num_src;
626 }
627
628 glsl_to_tgsi_instruction *
629 glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
630 st_dst_reg dst, st_dst_reg dst1,
631 st_src_reg src0, st_src_reg src1,
632 st_src_reg src2, st_src_reg src3)
633 {
634 glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction();
635 int num_reladdr = 0, i, j;
636 bool dst_is_64bit[2];
637
638 op = get_opcode(op, dst, src0, src1);
639
640 /* If we have to do relative addressing, we want to load the ARL
641 * reg directly for one of the regs, and preload the other reladdr
642 * sources into temps.
643 */
644 num_reladdr += dst.reladdr != NULL || dst.reladdr2;
645 num_reladdr += dst1.reladdr != NULL || dst1.reladdr2;
646 num_reladdr += src0.reladdr != NULL || src0.reladdr2 != NULL;
647 num_reladdr += src1.reladdr != NULL || src1.reladdr2 != NULL;
648 num_reladdr += src2.reladdr != NULL || src2.reladdr2 != NULL;
649 num_reladdr += src3.reladdr != NULL || src3.reladdr2 != NULL;
650
651 reladdr_to_temp(ir, &src3, &num_reladdr);
652 reladdr_to_temp(ir, &src2, &num_reladdr);
653 reladdr_to_temp(ir, &src1, &num_reladdr);
654 reladdr_to_temp(ir, &src0, &num_reladdr);
655
656 if (dst.reladdr || dst.reladdr2) {
657 if (dst.reladdr)
658 emit_arl(ir, address_reg, *dst.reladdr);
659 if (dst.reladdr2)
660 emit_arl(ir, address_reg2, *dst.reladdr2);
661 num_reladdr--;
662 }
663 if (dst1.reladdr) {
664 emit_arl(ir, address_reg, *dst1.reladdr);
665 num_reladdr--;
666 }
667 assert(num_reladdr == 0);
668
669 /* inst->op has only 8 bits. */
670 STATIC_ASSERT(TGSI_OPCODE_LAST <= 255);
671
672 inst->op = op;
673 inst->info = tgsi_get_opcode_info(op);
674 inst->dst[0] = dst;
675 inst->dst[1] = dst1;
676 inst->src[0] = src0;
677 inst->src[1] = src1;
678 inst->src[2] = src2;
679 inst->src[3] = src3;
680 inst->is_64bit_expanded = false;
681 inst->ir = ir;
682 inst->dead_mask = 0;
683 /* default to float, for paths where this is not initialized
684 * (since 0==UINT which is likely wrong):
685 */
686 inst->tex_type = GLSL_TYPE_FLOAT;
687
688 /* Update indirect addressing status used by TGSI */
689 if (dst.reladdr || dst.reladdr2) {
690 switch(dst.file) {
691 case PROGRAM_STATE_VAR:
692 case PROGRAM_CONSTANT:
693 case PROGRAM_UNIFORM:
694 this->indirect_addr_consts = true;
695 break;
696 case PROGRAM_IMMEDIATE:
697 assert(!"immediates should not have indirect addressing");
698 break;
699 default:
700 break;
701 }
702 }
703 else {
704 for (i = 0; i < 4; i++) {
705 if(inst->src[i].reladdr) {
706 switch(inst->src[i].file) {
707 case PROGRAM_STATE_VAR:
708 case PROGRAM_CONSTANT:
709 case PROGRAM_UNIFORM:
710 this->indirect_addr_consts = true;
711 break;
712 case PROGRAM_IMMEDIATE:
713 assert(!"immediates should not have indirect addressing");
714 break;
715 default:
716 break;
717 }
718 }
719 }
720 }
721
722 /*
723 * This section contains the double processing.
724 * GLSL just represents doubles as single channel values,
725 * however most HW and TGSI represent doubles as pairs of register channels.
726 *
727 * so we have to fixup destination writemask/index and src swizzle/indexes.
728 * dest writemasks need to translate from single channel write mask
729 * to a dual-channel writemask, but also need to modify the index,
730 * if we are touching the Z,W fields in the pre-translated writemask.
731 *
732 * src channels have similiar index modifications along with swizzle
733 * changes to we pick the XY, ZW pairs from the correct index.
734 *
735 * GLSL [0].x -> TGSI [0].xy
736 * GLSL [0].y -> TGSI [0].zw
737 * GLSL [0].z -> TGSI [1].xy
738 * GLSL [0].w -> TGSI [1].zw
739 */
740 for (j = 0; j < 2; j++) {
741 dst_is_64bit[j] = glsl_base_type_is_64bit(inst->dst[j].type);
742 if (!dst_is_64bit[j] && inst->dst[j].file == PROGRAM_OUTPUT && inst->dst[j].type == GLSL_TYPE_ARRAY) {
743 enum glsl_base_type type = find_array_type(this->outputs, this->num_outputs, inst->dst[j].array_id);
744 if (glsl_base_type_is_64bit(type))
745 dst_is_64bit[j] = true;
746 }
747 }
748
749 if (dst_is_64bit[0] || dst_is_64bit[1] ||
750 glsl_base_type_is_64bit(inst->src[0].type)) {
751 glsl_to_tgsi_instruction *dinst = NULL;
752 int initial_src_swz[4], initial_src_idx[4];
753 int initial_dst_idx[2], initial_dst_writemask[2];
754 /* select the writemask for dst0 or dst1 */
755 unsigned writemask = inst->dst[1].file == PROGRAM_UNDEFINED ? inst->dst[0].writemask : inst->dst[1].writemask;
756
757 /* copy out the writemask, index and swizzles for all src/dsts. */
758 for (j = 0; j < 2; j++) {
759 initial_dst_writemask[j] = inst->dst[j].writemask;
760 initial_dst_idx[j] = inst->dst[j].index;
761 }
762
763 for (j = 0; j < 4; j++) {
764 initial_src_swz[j] = inst->src[j].swizzle;
765 initial_src_idx[j] = inst->src[j].index;
766 }
767
768 /*
769 * scan all the components in the dst writemask
770 * generate an instruction for each of them if required.
771 */
772 st_src_reg addr;
773 while (writemask) {
774
775 int i = u_bit_scan(&writemask);
776
777 /* before emitting the instruction, see if we have to adjust store
778 * address */
779 if (i > 1 && inst->op == TGSI_OPCODE_STORE &&
780 addr.file == PROGRAM_UNDEFINED) {
781 /* We have to advance the buffer address by 16 */
782 addr = get_temp(glsl_type::uint_type);
783 emit_asm(ir, TGSI_OPCODE_UADD, st_dst_reg(addr),
784 inst->src[0], st_src_reg_for_int(16));
785 }
786
787
788 /* first time use previous instruction */
789 if (dinst == NULL) {
790 dinst = inst;
791 } else {
792 /* create a new instructions for subsequent attempts */
793 dinst = new(mem_ctx) glsl_to_tgsi_instruction();
794 *dinst = *inst;
795 dinst->next = NULL;
796 dinst->prev = NULL;
797 }
798 this->instructions.push_tail(dinst);
799 dinst->is_64bit_expanded = true;
800
801 /* modify the destination if we are splitting */
802 for (j = 0; j < 2; j++) {
803 if (dst_is_64bit[j]) {
804 dinst->dst[j].writemask = (i & 1) ? WRITEMASK_ZW : WRITEMASK_XY;
805 dinst->dst[j].index = initial_dst_idx[j];
806 if (i > 1) {
807 if (dinst->op == TGSI_OPCODE_STORE) {
808 dinst->src[0] = addr;
809 } else {
810 dinst->dst[j].index++;
811 }
812 }
813 } else {
814 /* if we aren't writing to a double, just get the bit of the initial writemask
815 for this channel */
816 dinst->dst[j].writemask = initial_dst_writemask[j] & (1 << i);
817 }
818 }
819
820 /* modify the src registers */
821 for (j = 0; j < 4; j++) {
822 int swz = GET_SWZ(initial_src_swz[j], i);
823
824 if (glsl_base_type_is_64bit(dinst->src[j].type)) {
825 dinst->src[j].index = initial_src_idx[j];
826 if (swz > 1) {
827 dinst->src[j].double_reg2 = true;
828 dinst->src[j].index++;
829 }
830
831 if (swz & 1)
832 dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
833 else
834 dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y);
835
836 } else {
837 /* some opcodes are special case in what they use as sources
838 - [FUI]2D/[UI]2I64 is a float/[u]int src0, DLDEXP is integer src1 */
839 if (op == TGSI_OPCODE_F2D || op == TGSI_OPCODE_U2D || op == TGSI_OPCODE_I2D ||
840 op == TGSI_OPCODE_I2I64 || op == TGSI_OPCODE_U2I64 ||
841 op == TGSI_OPCODE_DLDEXP ||
842 (op == TGSI_OPCODE_UCMP && dst_is_64bit[0])) {
843 dinst->src[j].swizzle = MAKE_SWIZZLE4(swz, swz, swz, swz);
844 }
845 }
846 }
847 }
848 inst = dinst;
849 } else {
850 this->instructions.push_tail(inst);
851 }
852
853
854 return inst;
855 }
856
857 glsl_to_tgsi_instruction *
858 glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
859 st_dst_reg dst,
860 st_src_reg src0, st_src_reg src1,
861 st_src_reg src2, st_src_reg src3)
862 {
863 return emit_asm(ir, op, dst, undef_dst, src0, src1, src2, src3);
864 }
865
866 /**
867 * Determines whether to use an integer, unsigned integer, or float opcode
868 * based on the operands and input opcode, then emits the result.
869 */
870 unsigned
871 glsl_to_tgsi_visitor::get_opcode(unsigned op,
872 st_dst_reg dst,
873 st_src_reg src0, st_src_reg src1)
874 {
875 enum glsl_base_type type = GLSL_TYPE_FLOAT;
876
877 if (op == TGSI_OPCODE_MOV)
878 return op;
879
880 assert(src0.type != GLSL_TYPE_ARRAY);
881 assert(src0.type != GLSL_TYPE_STRUCT);
882 assert(src1.type != GLSL_TYPE_ARRAY);
883 assert(src1.type != GLSL_TYPE_STRUCT);
884
885 if (is_resource_instruction(op))
886 type = src1.type;
887 else if (src0.type == GLSL_TYPE_DOUBLE || src1.type == GLSL_TYPE_DOUBLE)
888 type = GLSL_TYPE_DOUBLE;
889 else if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
890 type = GLSL_TYPE_FLOAT;
891 else if (native_integers)
892 type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type;
893
894 #define case5(c, f, i, u, d) \
895 case TGSI_OPCODE_##c: \
896 if (type == GLSL_TYPE_DOUBLE) \
897 op = TGSI_OPCODE_##d; \
898 else if (type == GLSL_TYPE_INT) \
899 op = TGSI_OPCODE_##i; \
900 else if (type == GLSL_TYPE_UINT) \
901 op = TGSI_OPCODE_##u; \
902 else \
903 op = TGSI_OPCODE_##f; \
904 break;
905
906 #define case4(c, f, i, u) \
907 case TGSI_OPCODE_##c: \
908 if (type == GLSL_TYPE_INT) \
909 op = TGSI_OPCODE_##i; \
910 else if (type == GLSL_TYPE_UINT) \
911 op = TGSI_OPCODE_##u; \
912 else \
913 op = TGSI_OPCODE_##f; \
914 break;
915
916 #define case3(f, i, u) case4(f, f, i, u)
917 #define case4d(f, i, u, d) case5(f, f, i, u, d)
918 #define case3fid(f, i, d) case5(f, f, i, i, d)
919 #define case2fi(f, i) case4(f, f, i, i)
920 #define case2iu(i, u) case4(i, LAST, i, u)
921
922 #define casecomp(c, f, i, u, d) \
923 case TGSI_OPCODE_##c: \
924 if (type == GLSL_TYPE_DOUBLE) \
925 op = TGSI_OPCODE_##d; \
926 else if (type == GLSL_TYPE_INT || type == GLSL_TYPE_SUBROUTINE) \
927 op = TGSI_OPCODE_##i; \
928 else if (type == GLSL_TYPE_UINT) \
929 op = TGSI_OPCODE_##u; \
930 else if (native_integers) \
931 op = TGSI_OPCODE_##f; \
932 else \
933 op = TGSI_OPCODE_##c; \
934 break;
935
936 switch(op) {
937 case3fid(ADD, UADD, DADD);
938 case3fid(MUL, UMUL, DMUL);
939 case3fid(MAD, UMAD, DMAD);
940 case3fid(FMA, UMAD, DFMA);
941 case3(DIV, IDIV, UDIV);
942 case4d(MAX, IMAX, UMAX, DMAX);
943 case4d(MIN, IMIN, UMIN, DMIN);
944 case2iu(MOD, UMOD);
945
946 casecomp(SEQ, FSEQ, USEQ, USEQ, DSEQ);
947 casecomp(SNE, FSNE, USNE, USNE, DSNE);
948 casecomp(SGE, FSGE, ISGE, USGE, DSGE);
949 casecomp(SLT, FSLT, ISLT, USLT, DSLT);
950
951 case2iu(ISHR, USHR);
952
953 case3fid(SSG, ISSG, DSSG);
954 case3fid(ABS, IABS, DABS);
955
956 case2iu(IBFE, UBFE);
957 case2iu(IMSB, UMSB);
958 case2iu(IMUL_HI, UMUL_HI);
959
960 case3fid(SQRT, SQRT, DSQRT);
961
962 case3fid(RCP, RCP, DRCP);
963 case3fid(RSQ, RSQ, DRSQ);
964
965 case3fid(FRC, FRC, DFRAC);
966 case3fid(TRUNC, TRUNC, DTRUNC);
967 case3fid(CEIL, CEIL, DCEIL);
968 case3fid(FLR, FLR, DFLR);
969 case3fid(ROUND, ROUND, DROUND);
970
971 case2iu(ATOMIMAX, ATOMUMAX);
972 case2iu(ATOMIMIN, ATOMUMIN);
973
974 default: break;
975 }
976
977 assert(op != TGSI_OPCODE_LAST);
978 return op;
979 }
980
981 glsl_to_tgsi_instruction *
982 glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
983 st_dst_reg dst, st_src_reg src0, st_src_reg src1,
984 unsigned elements)
985 {
986 static const unsigned dot_opcodes[] = {
987 TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4
988 };
989
990 return emit_asm(ir, dot_opcodes[elements - 2], dst, src0, src1);
991 }
992
993 /**
994 * Emits TGSI scalar opcodes to produce unique answers across channels.
995 *
996 * Some TGSI opcodes are scalar-only, like ARB_fp/vp. The src X
997 * channel determines the result across all channels. So to do a vec4
998 * of this operation, we want to emit a scalar per source channel used
999 * to produce dest channels.
1000 */
1001 void
1002 glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
1003 st_dst_reg dst,
1004 st_src_reg orig_src0, st_src_reg orig_src1)
1005 {
1006 int i, j;
1007 int done_mask = ~dst.writemask;
1008
1009 /* TGSI RCP is a scalar operation splatting results to all channels,
1010 * like ARB_fp/vp. So emit as many RCPs as necessary to cover our
1011 * dst channels.
1012 */
1013 for (i = 0; i < 4; i++) {
1014 GLuint this_mask = (1 << i);
1015 st_src_reg src0 = orig_src0;
1016 st_src_reg src1 = orig_src1;
1017
1018 if (done_mask & this_mask)
1019 continue;
1020
1021 GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
1022 GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
1023 for (j = i + 1; j < 4; j++) {
1024 /* If there is another enabled component in the destination that is
1025 * derived from the same inputs, generate its value on this pass as
1026 * well.
1027 */
1028 if (!(done_mask & (1 << j)) &&
1029 GET_SWZ(src0.swizzle, j) == src0_swiz &&
1030 GET_SWZ(src1.swizzle, j) == src1_swiz) {
1031 this_mask |= (1 << j);
1032 }
1033 }
1034 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
1035 src0_swiz, src0_swiz);
1036 src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
1037 src1_swiz, src1_swiz);
1038
1039 dst.writemask = this_mask;
1040 emit_asm(ir, op, dst, src0, src1);
1041 done_mask |= this_mask;
1042 }
1043 }
1044
1045 void
1046 glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
1047 st_dst_reg dst, st_src_reg src0)
1048 {
1049 st_src_reg undef = undef_src;
1050
1051 undef.swizzle = SWIZZLE_XXXX;
1052
1053 emit_scalar(ir, op, dst, src0, undef);
1054 }
1055
1056 void
1057 glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
1058 st_dst_reg dst, st_src_reg src0)
1059 {
1060 int op = TGSI_OPCODE_ARL;
1061
1062 if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT)
1063 op = TGSI_OPCODE_UARL;
1064
1065 assert(dst.file == PROGRAM_ADDRESS);
1066 if (dst.index >= this->num_address_regs)
1067 this->num_address_regs = dst.index + 1;
1068
1069 emit_asm(NULL, op, dst, src0);
1070 }
1071
1072 int
1073 glsl_to_tgsi_visitor::add_constant(gl_register_file file,
1074 gl_constant_value values[8], int size, int datatype,
1075 uint16_t *swizzle_out)
1076 {
1077 if (file == PROGRAM_CONSTANT) {
1078 GLuint swizzle = swizzle_out ? *swizzle_out : 0;
1079 int result = _mesa_add_typed_unnamed_constant(this->prog->Parameters, values,
1080 size, datatype, &swizzle);
1081 if (swizzle_out)
1082 *swizzle_out = swizzle;
1083 return result;
1084 }
1085
1086 assert(file == PROGRAM_IMMEDIATE);
1087
1088 int index = 0;
1089 immediate_storage *entry;
1090 int size32 = size * (datatype == GL_DOUBLE ? 2 : 1);
1091 int i;
1092
1093 /* Search immediate storage to see if we already have an identical
1094 * immediate that we can use instead of adding a duplicate entry.
1095 */
1096 foreach_in_list(immediate_storage, entry, &this->immediates) {
1097 immediate_storage *tmp = entry;
1098
1099 for (i = 0; i * 4 < size32; i++) {
1100 int slot_size = MIN2(size32 - (i * 4), 4);
1101 if (tmp->type != datatype || tmp->size32 != slot_size)
1102 break;
1103 if (memcmp(tmp->values, &values[i * 4],
1104 slot_size * sizeof(gl_constant_value)))
1105 break;
1106
1107 /* Everything matches, keep going until the full size is matched */
1108 tmp = (immediate_storage *)tmp->next;
1109 }
1110
1111 /* The full value matched */
1112 if (i * 4 >= size32)
1113 return index;
1114
1115 index++;
1116 }
1117
1118 for (i = 0; i * 4 < size32; i++) {
1119 int slot_size = MIN2(size32 - (i * 4), 4);
1120 /* Add this immediate to the list. */
1121 entry = new(mem_ctx) immediate_storage(&values[i * 4], slot_size, datatype);
1122 this->immediates.push_tail(entry);
1123 this->num_immediates++;
1124 }
1125 return index;
1126 }
1127
1128 st_src_reg
1129 glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
1130 {
1131 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT);
1132 union gl_constant_value uval;
1133
1134 uval.f = val;
1135 src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle);
1136
1137 return src;
1138 }
1139
1140 st_src_reg
1141 glsl_to_tgsi_visitor::st_src_reg_for_double(double val)
1142 {
1143 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_DOUBLE);
1144 union gl_constant_value uval[2];
1145
1146 memcpy(uval, &val, sizeof(uval));
1147 src.index = add_constant(src.file, uval, 1, GL_DOUBLE, &src.swizzle);
1148 src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y);
1149 return src;
1150 }
1151
1152 st_src_reg
1153 glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
1154 {
1155 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT);
1156 union gl_constant_value uval;
1157
1158 assert(native_integers);
1159
1160 uval.i = val;
1161 src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle);
1162
1163 return src;
1164 }
1165
1166 st_src_reg
1167 glsl_to_tgsi_visitor::st_src_reg_for_type(enum glsl_base_type type, int val)
1168 {
1169 if (native_integers)
1170 return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) :
1171 st_src_reg_for_int(val);
1172 else
1173 return st_src_reg_for_float(val);
1174 }
1175
1176 static int
1177 attrib_type_size(const struct glsl_type *type, bool is_vs_input)
1178 {
1179 return st_glsl_attrib_type_size(type, is_vs_input);
1180 }
1181
1182 static int
1183 type_size(const struct glsl_type *type)
1184 {
1185 return st_glsl_type_size(type);
1186 }
1187
1188 /**
1189 * If the given GLSL type is an array or matrix or a structure containing
1190 * an array/matrix member, return true. Else return false.
1191 *
1192 * This is used to determine which kind of temp storage (PROGRAM_TEMPORARY
1193 * or PROGRAM_ARRAY) should be used for variables of this type. Anytime
1194 * we have an array that might be indexed with a variable, we need to use
1195 * the later storage type.
1196 */
1197 static bool
1198 type_has_array_or_matrix(const glsl_type *type)
1199 {
1200 if (type->is_array() || type->is_matrix())
1201 return true;
1202
1203 if (type->is_record()) {
1204 for (unsigned i = 0; i < type->length; i++) {
1205 if (type_has_array_or_matrix(type->fields.structure[i].type)) {
1206 return true;
1207 }
1208 }
1209 }
1210
1211 return false;
1212 }
1213
1214
1215 /**
1216 * In the initial pass of codegen, we assign temporary numbers to
1217 * intermediate results. (not SSA -- variable assignments will reuse
1218 * storage).
1219 */
1220 st_src_reg
1221 glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
1222 {
1223 st_src_reg src;
1224
1225 src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
1226 src.reladdr = NULL;
1227 src.negate = 0;
1228
1229 if (!options->EmitNoIndirectTemp && type_has_array_or_matrix(type)) {
1230 if (next_array >= max_num_arrays) {
1231 max_num_arrays += 32;
1232 array_sizes = (unsigned*)
1233 realloc(array_sizes, sizeof(array_sizes[0]) * max_num_arrays);
1234 }
1235
1236 src.file = PROGRAM_ARRAY;
1237 src.index = 0;
1238 src.array_id = next_array + 1;
1239 array_sizes[next_array] = type_size(type);
1240 ++next_array;
1241
1242 } else {
1243 src.file = PROGRAM_TEMPORARY;
1244 src.index = next_temp;
1245 next_temp += type_size(type);
1246 }
1247
1248 if (type->is_array() || type->is_record()) {
1249 src.swizzle = SWIZZLE_NOOP;
1250 } else {
1251 src.swizzle = swizzle_for_size(type->vector_elements);
1252 }
1253
1254 return src;
1255 }
1256
1257 variable_storage *
1258 glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var)
1259 {
1260
1261 foreach_in_list(variable_storage, entry, &this->variables) {
1262 if (entry->var == var)
1263 return entry;
1264 }
1265
1266 return NULL;
1267 }
1268
1269 void
1270 glsl_to_tgsi_visitor::visit(ir_variable *ir)
1271 {
1272 if (strcmp(ir->name, "gl_FragCoord") == 0) {
1273 this->prog->OriginUpperLeft = ir->data.origin_upper_left;
1274 this->prog->PixelCenterInteger = ir->data.pixel_center_integer;
1275 }
1276
1277 if (ir->data.mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
1278 unsigned int i;
1279 const ir_state_slot *const slots = ir->get_state_slots();
1280 assert(slots != NULL);
1281
1282 /* Check if this statevar's setup in the STATE file exactly
1283 * matches how we'll want to reference it as a
1284 * struct/array/whatever. If not, then we need to move it into
1285 * temporary storage and hope that it'll get copy-propagated
1286 * out.
1287 */
1288 for (i = 0; i < ir->get_num_state_slots(); i++) {
1289 if (slots[i].swizzle != SWIZZLE_XYZW) {
1290 break;
1291 }
1292 }
1293
1294 variable_storage *storage;
1295 st_dst_reg dst;
1296 if (i == ir->get_num_state_slots()) {
1297 /* We'll set the index later. */
1298 storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
1299 this->variables.push_tail(storage);
1300
1301 dst = undef_dst;
1302 } else {
1303 /* The variable_storage constructor allocates slots based on the size
1304 * of the type. However, this had better match the number of state
1305 * elements that we're going to copy into the new temporary.
1306 */
1307 assert((int) ir->get_num_state_slots() == type_size(ir->type));
1308
1309 dst = st_dst_reg(get_temp(ir->type));
1310
1311 storage = new(mem_ctx) variable_storage(ir, dst.file, dst.index,
1312 dst.array_id);
1313
1314 this->variables.push_tail(storage);
1315 }
1316
1317
1318 for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) {
1319 int index = _mesa_add_state_reference(this->prog->Parameters,
1320 (gl_state_index *)slots[i].tokens);
1321
1322 if (storage->file == PROGRAM_STATE_VAR) {
1323 if (storage->index == -1) {
1324 storage->index = index;
1325 } else {
1326 assert(index == storage->index + (int)i);
1327 }
1328 } else {
1329 /* We use GLSL_TYPE_FLOAT here regardless of the actual type of
1330 * the data being moved since MOV does not care about the type of
1331 * data it is moving, and we don't want to declare registers with
1332 * array or struct types.
1333 */
1334 st_src_reg src(PROGRAM_STATE_VAR, index, GLSL_TYPE_FLOAT);
1335 src.swizzle = slots[i].swizzle;
1336 emit_asm(ir, TGSI_OPCODE_MOV, dst, src);
1337 /* even a float takes up a whole vec4 reg in a struct/array. */
1338 dst.index++;
1339 }
1340 }
1341
1342 if (storage->file == PROGRAM_TEMPORARY &&
1343 dst.index != storage->index + (int) ir->get_num_state_slots()) {
1344 fail_link(this->shader_program,
1345 "failed to load builtin uniform `%s' (%d/%d regs loaded)\n",
1346 ir->name, dst.index - storage->index,
1347 type_size(ir->type));
1348 }
1349 }
1350 }
1351
1352 void
1353 glsl_to_tgsi_visitor::visit(ir_loop *ir)
1354 {
1355 emit_asm(NULL, TGSI_OPCODE_BGNLOOP);
1356
1357 visit_exec_list(&ir->body_instructions, this);
1358
1359 emit_asm(NULL, TGSI_OPCODE_ENDLOOP);
1360 }
1361
1362 void
1363 glsl_to_tgsi_visitor::visit(ir_loop_jump *ir)
1364 {
1365 switch (ir->mode) {
1366 case ir_loop_jump::jump_break:
1367 emit_asm(NULL, TGSI_OPCODE_BRK);
1368 break;
1369 case ir_loop_jump::jump_continue:
1370 emit_asm(NULL, TGSI_OPCODE_CONT);
1371 break;
1372 }
1373 }
1374
1375
1376 void
1377 glsl_to_tgsi_visitor::visit(ir_function_signature *ir)
1378 {
1379 assert(0);
1380 (void)ir;
1381 }
1382
1383 void
1384 glsl_to_tgsi_visitor::visit(ir_function *ir)
1385 {
1386 /* Ignore function bodies other than main() -- we shouldn't see calls to
1387 * them since they should all be inlined before we get to glsl_to_tgsi.
1388 */
1389 if (strcmp(ir->name, "main") == 0) {
1390 const ir_function_signature *sig;
1391 exec_list empty;
1392
1393 sig = ir->matching_signature(NULL, &empty, false);
1394
1395 assert(sig);
1396
1397 foreach_in_list(ir_instruction, ir, &sig->body) {
1398 ir->accept(this);
1399 }
1400 }
1401 }
1402
1403 bool
1404 glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
1405 {
1406 int nonmul_operand = 1 - mul_operand;
1407 st_src_reg a, b, c;
1408 st_dst_reg result_dst;
1409
1410 ir_expression *expr = ir->operands[mul_operand]->as_expression();
1411 if (!expr || expr->operation != ir_binop_mul)
1412 return false;
1413
1414 expr->operands[0]->accept(this);
1415 a = this->result;
1416 expr->operands[1]->accept(this);
1417 b = this->result;
1418 ir->operands[nonmul_operand]->accept(this);
1419 c = this->result;
1420
1421 this->result = get_temp(ir->type);
1422 result_dst = st_dst_reg(this->result);
1423 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1424 emit_asm(ir, TGSI_OPCODE_MAD, result_dst, a, b, c);
1425
1426 return true;
1427 }
1428
1429 /**
1430 * Emit MAD(a, -b, a) instead of AND(a, NOT(b))
1431 *
1432 * The logic values are 1.0 for true and 0.0 for false. Logical-and is
1433 * implemented using multiplication, and logical-or is implemented using
1434 * addition. Logical-not can be implemented as (true - x), or (1.0 - x).
1435 * As result, the logical expression (a & !b) can be rewritten as:
1436 *
1437 * - a * !b
1438 * - a * (1 - b)
1439 * - (a * 1) - (a * b)
1440 * - a + -(a * b)
1441 * - a + (a * -b)
1442 *
1443 * This final expression can be implemented as a single MAD(a, -b, a)
1444 * instruction.
1445 */
1446 bool
1447 glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
1448 {
1449 const int other_operand = 1 - try_operand;
1450 st_src_reg a, b;
1451
1452 ir_expression *expr = ir->operands[try_operand]->as_expression();
1453 if (!expr || expr->operation != ir_unop_logic_not)
1454 return false;
1455
1456 ir->operands[other_operand]->accept(this);
1457 a = this->result;
1458 expr->operands[0]->accept(this);
1459 b = this->result;
1460
1461 b.negate = ~b.negate;
1462
1463 this->result = get_temp(ir->type);
1464 emit_asm(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a);
1465
1466 return true;
1467 }
1468
1469 void
1470 glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
1471 st_src_reg *reg, int *num_reladdr)
1472 {
1473 if (!reg->reladdr && !reg->reladdr2)
1474 return;
1475
1476 if (reg->reladdr) emit_arl(ir, address_reg, *reg->reladdr);
1477 if (reg->reladdr2) emit_arl(ir, address_reg2, *reg->reladdr2);
1478
1479 if (*num_reladdr != 1) {
1480 st_src_reg temp = get_temp(reg->type == GLSL_TYPE_DOUBLE ? glsl_type::dvec4_type : glsl_type::vec4_type);
1481
1482 emit_asm(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg);
1483 *reg = temp;
1484 }
1485
1486 (*num_reladdr)--;
1487 }
1488
1489 void
1490 glsl_to_tgsi_visitor::visit(ir_expression *ir)
1491 {
1492 st_src_reg op[ARRAY_SIZE(ir->operands)];
1493
1494 /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c)
1495 */
1496 if (ir->operation == ir_binop_add) {
1497 if (try_emit_mad(ir, 1))
1498 return;
1499 if (try_emit_mad(ir, 0))
1500 return;
1501 }
1502
1503 /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b))
1504 */
1505 if (!native_integers && ir->operation == ir_binop_logic_and) {
1506 if (try_emit_mad_for_and_not(ir, 1))
1507 return;
1508 if (try_emit_mad_for_and_not(ir, 0))
1509 return;
1510 }
1511
1512 if (ir->operation == ir_quadop_vector)
1513 assert(!"ir_quadop_vector should have been lowered");
1514
1515 for (unsigned int operand = 0; operand < ir->get_num_operands(); operand++) {
1516 this->result.file = PROGRAM_UNDEFINED;
1517 ir->operands[operand]->accept(this);
1518 if (this->result.file == PROGRAM_UNDEFINED) {
1519 printf("Failed to get tree for expression operand:\n");
1520 ir->operands[operand]->print();
1521 printf("\n");
1522 exit(1);
1523 }
1524 op[operand] = this->result;
1525
1526 /* Matrix expression operands should have been broken down to vector
1527 * operations already.
1528 */
1529 assert(!ir->operands[operand]->type->is_matrix());
1530 }
1531
1532 visit_expression(ir, op);
1533 }
1534
1535 /* The non-recursive part of the expression visitor lives in a separate
1536 * function and should be prevented from being inlined, to avoid a stack
1537 * explosion when deeply nested expressions are visited.
1538 */
1539 void
1540 glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
1541 {
1542 st_src_reg result_src;
1543 st_dst_reg result_dst;
1544
1545 int vector_elements = ir->operands[0]->type->vector_elements;
1546 if (ir->operands[1]) {
1547 vector_elements = MAX2(vector_elements,
1548 ir->operands[1]->type->vector_elements);
1549 }
1550
1551 this->result.file = PROGRAM_UNDEFINED;
1552
1553 /* Storage for our result. Ideally for an assignment we'd be using
1554 * the actual storage for the result here, instead.
1555 */
1556 result_src = get_temp(ir->type);
1557 /* convenience for the emit functions below. */
1558 result_dst = st_dst_reg(result_src);
1559 /* Limit writes to the channels that will be used by result_src later.
1560 * This does limit this temp's use as a temporary for multi-instruction
1561 * sequences.
1562 */
1563 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1564
1565 switch (ir->operation) {
1566 case ir_unop_logic_not:
1567 if (result_dst.type != GLSL_TYPE_FLOAT)
1568 emit_asm(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
1569 else {
1570 /* Previously 'SEQ dst, src, 0.0' was used for this. However, many
1571 * older GPUs implement SEQ using multiple instructions (i915 uses two
1572 * SGE instructions and a MUL instruction). Since our logic values are
1573 * 0.0 and 1.0, 1-x also implements !x.
1574 */
1575 op[0].negate = ~op[0].negate;
1576 emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0));
1577 }
1578 break;
1579 case ir_unop_neg:
1580 if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT)
1581 emit_asm(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
1582 else if (result_dst.type == GLSL_TYPE_DOUBLE)
1583 emit_asm(ir, TGSI_OPCODE_DNEG, result_dst, op[0]);
1584 else {
1585 op[0].negate = ~op[0].negate;
1586 result_src = op[0];
1587 }
1588 break;
1589 case ir_unop_subroutine_to_int:
1590 emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
1591 break;
1592 case ir_unop_abs:
1593 emit_asm(ir, TGSI_OPCODE_ABS, result_dst, op[0]);
1594 break;
1595 case ir_unop_sign:
1596 emit_asm(ir, TGSI_OPCODE_SSG, result_dst, op[0]);
1597 break;
1598 case ir_unop_rcp:
1599 emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]);
1600 break;
1601
1602 case ir_unop_exp2:
1603 emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]);
1604 break;
1605 case ir_unop_exp:
1606 case ir_unop_log:
1607 assert(!"not reached: should be handled by ir_explog_to_explog2");
1608 break;
1609 case ir_unop_log2:
1610 emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]);
1611 break;
1612 case ir_unop_sin:
1613 emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]);
1614 break;
1615 case ir_unop_cos:
1616 emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]);
1617 break;
1618 case ir_unop_saturate: {
1619 glsl_to_tgsi_instruction *inst;
1620 inst = emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
1621 inst->saturate = true;
1622 break;
1623 }
1624
1625 case ir_unop_dFdx:
1626 case ir_unop_dFdx_coarse:
1627 emit_asm(ir, TGSI_OPCODE_DDX, result_dst, op[0]);
1628 break;
1629 case ir_unop_dFdx_fine:
1630 emit_asm(ir, TGSI_OPCODE_DDX_FINE, result_dst, op[0]);
1631 break;
1632 case ir_unop_dFdy:
1633 case ir_unop_dFdy_coarse:
1634 case ir_unop_dFdy_fine:
1635 {
1636 /* The X component contains 1 or -1 depending on whether the framebuffer
1637 * is a FBO or the window system buffer, respectively.
1638 * It is then multiplied with the source operand of DDY.
1639 */
1640 static const gl_state_index transform_y_state[STATE_LENGTH]
1641 = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM };
1642
1643 unsigned transform_y_index =
1644 _mesa_add_state_reference(this->prog->Parameters,
1645 transform_y_state);
1646
1647 st_src_reg transform_y = st_src_reg(PROGRAM_STATE_VAR,
1648 transform_y_index,
1649 glsl_type::vec4_type);
1650 transform_y.swizzle = SWIZZLE_XXXX;
1651
1652 st_src_reg temp = get_temp(glsl_type::vec4_type);
1653
1654 emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(temp), transform_y, op[0]);
1655 emit_asm(ir, ir->operation == ir_unop_dFdy_fine ?
1656 TGSI_OPCODE_DDY_FINE : TGSI_OPCODE_DDY, result_dst, temp);
1657 break;
1658 }
1659
1660 case ir_unop_frexp_sig:
1661 emit_asm(ir, TGSI_OPCODE_DFRACEXP, result_dst, undef_dst, op[0]);
1662 break;
1663
1664 case ir_unop_frexp_exp:
1665 emit_asm(ir, TGSI_OPCODE_DFRACEXP, undef_dst, result_dst, op[0]);
1666 break;
1667
1668 case ir_unop_noise: {
1669 /* At some point, a motivated person could add a better
1670 * implementation of noise. Currently not even the nvidia
1671 * binary drivers do anything more than this. In any case, the
1672 * place to do this is in the GL state tracker, not the poor
1673 * driver.
1674 */
1675 emit_asm(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5));
1676 break;
1677 }
1678
1679 case ir_binop_add:
1680 emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
1681 break;
1682 case ir_binop_sub:
1683 emit_asm(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]);
1684 break;
1685
1686 case ir_binop_mul:
1687 emit_asm(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
1688 break;
1689 case ir_binop_div:
1690 if (result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_DOUBLE)
1691 assert(!"not reached: should be handled by ir_div_to_mul_rcp");
1692 else
1693 emit_asm(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]);
1694 break;
1695 case ir_binop_mod:
1696 if (result_dst.type == GLSL_TYPE_FLOAT)
1697 assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
1698 else
1699 emit_asm(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]);
1700 break;
1701
1702 case ir_binop_less:
1703 emit_asm(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]);
1704 break;
1705 case ir_binop_greater:
1706 emit_asm(ir, TGSI_OPCODE_SLT, result_dst, op[1], op[0]);
1707 break;
1708 case ir_binop_lequal:
1709 emit_asm(ir, TGSI_OPCODE_SGE, result_dst, op[1], op[0]);
1710 break;
1711 case ir_binop_gequal:
1712 emit_asm(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]);
1713 break;
1714 case ir_binop_equal:
1715 emit_asm(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
1716 break;
1717 case ir_binop_nequal:
1718 emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1719 break;
1720 case ir_binop_all_equal:
1721 /* "==" operator producing a scalar boolean. */
1722 if (ir->operands[0]->type->is_vector() ||
1723 ir->operands[1]->type->is_vector()) {
1724 st_src_reg temp = get_temp(native_integers ?
1725 glsl_type::uvec4_type :
1726 glsl_type::vec4_type);
1727
1728 if (native_integers) {
1729 st_dst_reg temp_dst = st_dst_reg(temp);
1730 st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp);
1731
1732 if (ir->operands[0]->type->is_boolean() &&
1733 ir->operands[1]->as_constant() &&
1734 ir->operands[1]->as_constant()->is_one()) {
1735 emit_asm(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), op[0]);
1736 } else {
1737 emit_asm(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]);
1738 }
1739
1740 /* Emit 1-3 AND operations to combine the SEQ results. */
1741 switch (ir->operands[0]->type->vector_elements) {
1742 case 2:
1743 break;
1744 case 3:
1745 temp_dst.writemask = WRITEMASK_Y;
1746 temp1.swizzle = SWIZZLE_YYYY;
1747 temp2.swizzle = SWIZZLE_ZZZZ;
1748 emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
1749 break;
1750 case 4:
1751 temp_dst.writemask = WRITEMASK_X;
1752 temp1.swizzle = SWIZZLE_XXXX;
1753 temp2.swizzle = SWIZZLE_YYYY;
1754 emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
1755 temp_dst.writemask = WRITEMASK_Y;
1756 temp1.swizzle = SWIZZLE_ZZZZ;
1757 temp2.swizzle = SWIZZLE_WWWW;
1758 emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
1759 }
1760
1761 temp1.swizzle = SWIZZLE_XXXX;
1762 temp2.swizzle = SWIZZLE_YYYY;
1763 emit_asm(ir, TGSI_OPCODE_AND, result_dst, temp1, temp2);
1764 } else {
1765 emit_asm(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
1766
1767 /* After the dot-product, the value will be an integer on the
1768 * range [0,4]. Zero becomes 1.0, and positive values become zero.
1769 */
1770 emit_dp(ir, result_dst, temp, temp, vector_elements);
1771
1772 /* Negating the result of the dot-product gives values on the range
1773 * [-4, 0]. Zero becomes 1.0, and negative values become zero.
1774 * This is achieved using SGE.
1775 */
1776 st_src_reg sge_src = result_src;
1777 sge_src.negate = ~sge_src.negate;
1778 emit_asm(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0));
1779 }
1780 } else {
1781 emit_asm(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
1782 }
1783 break;
1784 case ir_binop_any_nequal:
1785 /* "!=" operator producing a scalar boolean. */
1786 if (ir->operands[0]->type->is_vector() ||
1787 ir->operands[1]->type->is_vector()) {
1788 st_src_reg temp = get_temp(native_integers ?
1789 glsl_type::uvec4_type :
1790 glsl_type::vec4_type);
1791 if (ir->operands[0]->type->is_boolean() &&
1792 ir->operands[1]->as_constant() &&
1793 ir->operands[1]->as_constant()->is_zero()) {
1794 emit_asm(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), op[0]);
1795 } else {
1796 emit_asm(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
1797 }
1798
1799 if (native_integers) {
1800 st_dst_reg temp_dst = st_dst_reg(temp);
1801 st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp);
1802
1803 /* Emit 1-3 OR operations to combine the SNE results. */
1804 switch (ir->operands[0]->type->vector_elements) {
1805 case 2:
1806 break;
1807 case 3:
1808 temp_dst.writemask = WRITEMASK_Y;
1809 temp1.swizzle = SWIZZLE_YYYY;
1810 temp2.swizzle = SWIZZLE_ZZZZ;
1811 emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
1812 break;
1813 case 4:
1814 temp_dst.writemask = WRITEMASK_X;
1815 temp1.swizzle = SWIZZLE_XXXX;
1816 temp2.swizzle = SWIZZLE_YYYY;
1817 emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
1818 temp_dst.writemask = WRITEMASK_Y;
1819 temp1.swizzle = SWIZZLE_ZZZZ;
1820 temp2.swizzle = SWIZZLE_WWWW;
1821 emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
1822 }
1823
1824 temp1.swizzle = SWIZZLE_XXXX;
1825 temp2.swizzle = SWIZZLE_YYYY;
1826 emit_asm(ir, TGSI_OPCODE_OR, result_dst, temp1, temp2);
1827 } else {
1828 /* After the dot-product, the value will be an integer on the
1829 * range [0,4]. Zero stays zero, and positive values become 1.0.
1830 */
1831 glsl_to_tgsi_instruction *const dp =
1832 emit_dp(ir, result_dst, temp, temp, vector_elements);
1833 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1834 /* The clamping to [0,1] can be done for free in the fragment
1835 * shader with a saturate.
1836 */
1837 dp->saturate = true;
1838 } else {
1839 /* Negating the result of the dot-product gives values on the range
1840 * [-4, 0]. Zero stays zero, and negative values become 1.0. This
1841 * achieved using SLT.
1842 */
1843 st_src_reg slt_src = result_src;
1844 slt_src.negate = ~slt_src.negate;
1845 emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
1846 }
1847 }
1848 } else {
1849 emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1850 }
1851 break;
1852
1853 case ir_binop_logic_xor:
1854 if (native_integers)
1855 emit_asm(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
1856 else
1857 emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1858 break;
1859
1860 case ir_binop_logic_or: {
1861 if (native_integers) {
1862 /* If integers are used as booleans, we can use an actual "or"
1863 * instruction.
1864 */
1865 assert(native_integers);
1866 emit_asm(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]);
1867 } else {
1868 /* After the addition, the value will be an integer on the
1869 * range [0,2]. Zero stays zero, and positive values become 1.0.
1870 */
1871 glsl_to_tgsi_instruction *add =
1872 emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
1873 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1874 /* The clamping to [0,1] can be done for free in the fragment
1875 * shader with a saturate if floats are being used as boolean values.
1876 */
1877 add->saturate = true;
1878 } else {
1879 /* Negating the result of the addition gives values on the range
1880 * [-2, 0]. Zero stays zero, and negative values become 1.0. This
1881 * is achieved using SLT.
1882 */
1883 st_src_reg slt_src = result_src;
1884 slt_src.negate = ~slt_src.negate;
1885 emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
1886 }
1887 }
1888 break;
1889 }
1890
1891 case ir_binop_logic_and:
1892 /* If native integers are disabled, the bool args are stored as float 0.0
1893 * or 1.0, so "mul" gives us "and". If they're enabled, just use the
1894 * actual AND opcode.
1895 */
1896 if (native_integers)
1897 emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]);
1898 else
1899 emit_asm(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
1900 break;
1901
1902 case ir_binop_dot:
1903 assert(ir->operands[0]->type->is_vector());
1904 assert(ir->operands[0]->type == ir->operands[1]->type);
1905 emit_dp(ir, result_dst, op[0], op[1],
1906 ir->operands[0]->type->vector_elements);
1907 break;
1908
1909 case ir_unop_sqrt:
1910 if (have_sqrt) {
1911 emit_scalar(ir, TGSI_OPCODE_SQRT, result_dst, op[0]);
1912 } else {
1913 /* This is the only instruction sequence that makes the game "Risen"
1914 * render correctly. ABS is not required for the game, but since GLSL
1915 * declares negative values as "undefined", allowing us to do whatever
1916 * we want, I choose to use ABS to match DX9 and pre-GLSL RSQ
1917 * behavior.
1918 */
1919 emit_scalar(ir, TGSI_OPCODE_ABS, result_dst, op[0]);
1920 emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, result_src);
1921 emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, result_src);
1922 }
1923 break;
1924 case ir_unop_rsq:
1925 emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
1926 break;
1927 case ir_unop_i2f:
1928 if (native_integers) {
1929 emit_asm(ir, TGSI_OPCODE_I2F, result_dst, op[0]);
1930 break;
1931 }
1932 /* fallthrough to next case otherwise */
1933 case ir_unop_b2f:
1934 if (native_integers) {
1935 emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0));
1936 break;
1937 }
1938 /* fallthrough to next case otherwise */
1939 case ir_unop_i2u:
1940 case ir_unop_u2i:
1941 /* Converting between signed and unsigned integers is a no-op. */
1942 result_src = op[0];
1943 result_src.type = result_dst.type;
1944 break;
1945 case ir_unop_b2i:
1946 if (native_integers) {
1947 /* Booleans are stored as integers using ~0 for true and 0 for false.
1948 * GLSL requires that int(bool) return 1 for true and 0 for false.
1949 * This conversion is done with AND, but it could be done with NEG.
1950 */
1951 emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1));
1952 } else {
1953 /* Booleans and integers are both stored as floats when native
1954 * integers are disabled.
1955 */
1956 result_src = op[0];
1957 }
1958 break;
1959 case ir_unop_f2i:
1960 if (native_integers)
1961 emit_asm(ir, TGSI_OPCODE_F2I, result_dst, op[0]);
1962 else
1963 emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
1964 break;
1965 case ir_unop_f2u:
1966 if (native_integers)
1967 emit_asm(ir, TGSI_OPCODE_F2U, result_dst, op[0]);
1968 else
1969 emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
1970 break;
1971 case ir_unop_bitcast_f2i:
1972 case ir_unop_bitcast_f2u:
1973 /* Make sure we don't propagate the negate modifier to integer opcodes. */
1974 if (op[0].negate)
1975 emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
1976 else
1977 result_src = op[0];
1978 result_src.type = ir->operation == ir_unop_bitcast_f2i ? GLSL_TYPE_INT :
1979 GLSL_TYPE_UINT;
1980 break;
1981 case ir_unop_bitcast_i2f:
1982 case ir_unop_bitcast_u2f:
1983 result_src = op[0];
1984 result_src.type = GLSL_TYPE_FLOAT;
1985 break;
1986 case ir_unop_f2b:
1987 emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
1988 break;
1989 case ir_unop_d2b:
1990 emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_double(0.0));
1991 break;
1992 case ir_unop_i2b:
1993 if (native_integers)
1994 emit_asm(ir, TGSI_OPCODE_USNE, result_dst, op[0], st_src_reg_for_int(0));
1995 else
1996 emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
1997 break;
1998 case ir_unop_trunc:
1999 emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
2000 break;
2001 case ir_unop_ceil:
2002 emit_asm(ir, TGSI_OPCODE_CEIL, result_dst, op[0]);
2003 break;
2004 case ir_unop_floor:
2005 emit_asm(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
2006 break;
2007 case ir_unop_round_even:
2008 emit_asm(ir, TGSI_OPCODE_ROUND, result_dst, op[0]);
2009 break;
2010 case ir_unop_fract:
2011 emit_asm(ir, TGSI_OPCODE_FRC, result_dst, op[0]);
2012 break;
2013
2014 case ir_binop_min:
2015 emit_asm(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]);
2016 break;
2017 case ir_binop_max:
2018 emit_asm(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]);
2019 break;
2020 case ir_binop_pow:
2021 emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]);
2022 break;
2023
2024 case ir_unop_bit_not:
2025 if (native_integers) {
2026 emit_asm(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
2027 break;
2028 }
2029 case ir_unop_u2f:
2030 if (native_integers) {
2031 emit_asm(ir, TGSI_OPCODE_U2F, result_dst, op[0]);
2032 break;
2033 }
2034 case ir_binop_lshift:
2035 if (native_integers) {
2036 emit_asm(ir, TGSI_OPCODE_SHL, result_dst, op[0], op[1]);
2037 break;
2038 }
2039 case ir_binop_rshift:
2040 if (native_integers) {
2041 emit_asm(ir, TGSI_OPCODE_ISHR, result_dst, op[0], op[1]);
2042 break;
2043 }
2044 case ir_binop_bit_and:
2045 if (native_integers) {
2046 emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]);
2047 break;
2048 }
2049 case ir_binop_bit_xor:
2050 if (native_integers) {
2051 emit_asm(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
2052 break;
2053 }
2054 case ir_binop_bit_or:
2055 if (native_integers) {
2056 emit_asm(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]);
2057 break;
2058 }
2059
2060 assert(!"GLSL 1.30 features unsupported");
2061 break;
2062
2063 case ir_binop_ubo_load: {
2064 ir_constant *const_uniform_block = ir->operands[0]->as_constant();
2065 ir_constant *const_offset_ir = ir->operands[1]->as_constant();
2066 unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0;
2067 unsigned const_block = const_uniform_block ? const_uniform_block->value.u[0] + 1 : 0;
2068 st_src_reg index_reg = get_temp(glsl_type::uint_type);
2069 st_src_reg cbuf;
2070
2071 cbuf.type = ir->type->base_type;
2072 cbuf.file = PROGRAM_CONSTANT;
2073 cbuf.index = 0;
2074 cbuf.reladdr = NULL;
2075 cbuf.negate = 0;
2076
2077 assert(ir->type->is_vector() || ir->type->is_scalar());
2078
2079 if (const_offset_ir) {
2080 /* Constant index into constant buffer */
2081 cbuf.reladdr = NULL;
2082 cbuf.index = const_offset / 16;
2083 }
2084 else {
2085 ir_expression *offset_expr = ir->operands[1]->as_expression();
2086 st_src_reg offset = op[1];
2087
2088 /* The OpenGL spec is written in such a way that accesses with
2089 * non-constant offset are almost always vec4-aligned. The only
2090 * exception to this are members of structs in arrays of structs:
2091 * each struct in an array of structs is at least vec4-aligned,
2092 * but single-element and [ui]vec2 members of the struct may be at
2093 * an offset that is not a multiple of 16 bytes.
2094 *
2095 * Here, we extract that offset, relying on previous passes to always
2096 * generate offset expressions of the form (+ expr constant_offset).
2097 *
2098 * Note that the std430 layout, which allows more cases of alignment
2099 * less than vec4 in arrays, is not supported for uniform blocks, so
2100 * we do not have to deal with it here.
2101 */
2102 if (offset_expr && offset_expr->operation == ir_binop_add) {
2103 const_offset_ir = offset_expr->operands[1]->as_constant();
2104 if (const_offset_ir) {
2105 const_offset = const_offset_ir->value.u[0];
2106 cbuf.index = const_offset / 16;
2107 offset_expr->operands[0]->accept(this);
2108 offset = this->result;
2109 }
2110 }
2111
2112 /* Relative/variable index into constant buffer */
2113 emit_asm(ir, TGSI_OPCODE_USHR, st_dst_reg(index_reg), offset,
2114 st_src_reg_for_int(4));
2115 cbuf.reladdr = ralloc(mem_ctx, st_src_reg);
2116 memcpy(cbuf.reladdr, &index_reg, sizeof(index_reg));
2117 }
2118
2119 if (const_uniform_block) {
2120 /* Constant constant buffer */
2121 cbuf.reladdr2 = NULL;
2122 cbuf.index2D = const_block;
2123 cbuf.has_index2 = true;
2124 }
2125 else {
2126 /* Relative/variable constant buffer */
2127 cbuf.reladdr2 = ralloc(mem_ctx, st_src_reg);
2128 cbuf.index2D = 1;
2129 memcpy(cbuf.reladdr2, &op[0], sizeof(st_src_reg));
2130 cbuf.has_index2 = true;
2131 }
2132
2133 cbuf.swizzle = swizzle_for_size(ir->type->vector_elements);
2134 if (glsl_base_type_is_64bit(cbuf.type))
2135 cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 8,
2136 const_offset % 16 / 8,
2137 const_offset % 16 / 8,
2138 const_offset % 16 / 8);
2139 else
2140 cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 4,
2141 const_offset % 16 / 4,
2142 const_offset % 16 / 4,
2143 const_offset % 16 / 4);
2144
2145 if (ir->type->base_type == GLSL_TYPE_BOOL) {
2146 emit_asm(ir, TGSI_OPCODE_USNE, result_dst, cbuf, st_src_reg_for_int(0));
2147 } else {
2148 emit_asm(ir, TGSI_OPCODE_MOV, result_dst, cbuf);
2149 }
2150 break;
2151 }
2152 case ir_triop_lrp:
2153 /* note: we have to reorder the three args here */
2154 emit_asm(ir, TGSI_OPCODE_LRP, result_dst, op[2], op[1], op[0]);
2155 break;
2156 case ir_triop_csel:
2157 if (this->ctx->Const.NativeIntegers)
2158 emit_asm(ir, TGSI_OPCODE_UCMP, result_dst, op[0], op[1], op[2]);
2159 else {
2160 op[0].negate = ~op[0].negate;
2161 emit_asm(ir, TGSI_OPCODE_CMP, result_dst, op[0], op[1], op[2]);
2162 }
2163 break;
2164 case ir_triop_bitfield_extract:
2165 emit_asm(ir, TGSI_OPCODE_IBFE, result_dst, op[0], op[1], op[2]);
2166 break;
2167 case ir_quadop_bitfield_insert:
2168 emit_asm(ir, TGSI_OPCODE_BFI, result_dst, op[0], op[1], op[2], op[3]);
2169 break;
2170 case ir_unop_bitfield_reverse:
2171 emit_asm(ir, TGSI_OPCODE_BREV, result_dst, op[0]);
2172 break;
2173 case ir_unop_bit_count:
2174 emit_asm(ir, TGSI_OPCODE_POPC, result_dst, op[0]);
2175 break;
2176 case ir_unop_find_msb:
2177 emit_asm(ir, TGSI_OPCODE_IMSB, result_dst, op[0]);
2178 break;
2179 case ir_unop_find_lsb:
2180 emit_asm(ir, TGSI_OPCODE_LSB, result_dst, op[0]);
2181 break;
2182 case ir_binop_imul_high:
2183 emit_asm(ir, TGSI_OPCODE_IMUL_HI, result_dst, op[0], op[1]);
2184 break;
2185 case ir_triop_fma:
2186 /* In theory, MAD is incorrect here. */
2187 if (have_fma)
2188 emit_asm(ir, TGSI_OPCODE_FMA, result_dst, op[0], op[1], op[2]);
2189 else
2190 emit_asm(ir, TGSI_OPCODE_MAD, result_dst, op[0], op[1], op[2]);
2191 break;
2192 case ir_unop_interpolate_at_centroid:
2193 emit_asm(ir, TGSI_OPCODE_INTERP_CENTROID, result_dst, op[0]);
2194 break;
2195 case ir_binop_interpolate_at_offset: {
2196 /* The y coordinate needs to be flipped for the default fb */
2197 static const gl_state_index transform_y_state[STATE_LENGTH]
2198 = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM };
2199
2200 unsigned transform_y_index =
2201 _mesa_add_state_reference(this->prog->Parameters,
2202 transform_y_state);
2203
2204 st_src_reg transform_y = st_src_reg(PROGRAM_STATE_VAR,
2205 transform_y_index,
2206 glsl_type::vec4_type);
2207 transform_y.swizzle = SWIZZLE_XXXX;
2208
2209 st_src_reg temp = get_temp(glsl_type::vec2_type);
2210 st_dst_reg temp_dst = st_dst_reg(temp);
2211
2212 emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[1]);
2213 temp_dst.writemask = WRITEMASK_Y;
2214 emit_asm(ir, TGSI_OPCODE_MUL, temp_dst, transform_y, op[1]);
2215 emit_asm(ir, TGSI_OPCODE_INTERP_OFFSET, result_dst, op[0], temp);
2216 break;
2217 }
2218 case ir_binop_interpolate_at_sample:
2219 emit_asm(ir, TGSI_OPCODE_INTERP_SAMPLE, result_dst, op[0], op[1]);
2220 break;
2221
2222 case ir_unop_d2f:
2223 emit_asm(ir, TGSI_OPCODE_D2F, result_dst, op[0]);
2224 break;
2225 case ir_unop_f2d:
2226 emit_asm(ir, TGSI_OPCODE_F2D, result_dst, op[0]);
2227 break;
2228 case ir_unop_d2i:
2229 emit_asm(ir, TGSI_OPCODE_D2I, result_dst, op[0]);
2230 break;
2231 case ir_unop_i2d:
2232 emit_asm(ir, TGSI_OPCODE_I2D, result_dst, op[0]);
2233 break;
2234 case ir_unop_d2u:
2235 emit_asm(ir, TGSI_OPCODE_D2U, result_dst, op[0]);
2236 break;
2237 case ir_unop_u2d:
2238 emit_asm(ir, TGSI_OPCODE_U2D, result_dst, op[0]);
2239 break;
2240 case ir_unop_unpack_double_2x32:
2241 case ir_unop_pack_double_2x32:
2242 emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
2243 break;
2244
2245 case ir_binop_ldexp:
2246 if (ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE) {
2247 emit_asm(ir, TGSI_OPCODE_DLDEXP, result_dst, op[0], op[1]);
2248 } else {
2249 assert(!"Invalid ldexp for non-double opcode in glsl_to_tgsi_visitor::visit()");
2250 }
2251 break;
2252
2253 case ir_unop_pack_half_2x16:
2254 emit_asm(ir, TGSI_OPCODE_PK2H, result_dst, op[0]);
2255 break;
2256 case ir_unop_unpack_half_2x16:
2257 emit_asm(ir, TGSI_OPCODE_UP2H, result_dst, op[0]);
2258 break;
2259
2260 case ir_unop_get_buffer_size: {
2261 ir_constant *const_offset = ir->operands[0]->as_constant();
2262 st_src_reg buffer(
2263 PROGRAM_BUFFER,
2264 ctx->Const.Program[shader->Stage].MaxAtomicBuffers +
2265 (const_offset ? const_offset->value.u[0] : 0),
2266 GLSL_TYPE_UINT);
2267 if (!const_offset) {
2268 buffer.reladdr = ralloc(mem_ctx, st_src_reg);
2269 *buffer.reladdr = op[0];
2270 emit_arl(ir, sampler_reladdr, op[0]);
2271 }
2272 emit_asm(ir, TGSI_OPCODE_RESQ, result_dst)->resource = buffer;
2273 break;
2274 }
2275
2276 case ir_unop_vote_any:
2277 emit_asm(ir, TGSI_OPCODE_VOTE_ANY, result_dst, op[0]);
2278 break;
2279 case ir_unop_vote_all:
2280 emit_asm(ir, TGSI_OPCODE_VOTE_ALL, result_dst, op[0]);
2281 break;
2282 case ir_unop_vote_eq:
2283 emit_asm(ir, TGSI_OPCODE_VOTE_EQ, result_dst, op[0]);
2284 break;
2285
2286 case ir_unop_pack_snorm_2x16:
2287 case ir_unop_pack_unorm_2x16:
2288 case ir_unop_pack_snorm_4x8:
2289 case ir_unop_pack_unorm_4x8:
2290
2291 case ir_unop_unpack_snorm_2x16:
2292 case ir_unop_unpack_unorm_2x16:
2293 case ir_unop_unpack_snorm_4x8:
2294 case ir_unop_unpack_unorm_4x8:
2295
2296 case ir_quadop_vector:
2297 case ir_binop_vector_extract:
2298 case ir_triop_vector_insert:
2299 case ir_binop_carry:
2300 case ir_binop_borrow:
2301 case ir_unop_ssbo_unsized_array_length:
2302 /* This operation is not supported, or should have already been handled.
2303 */
2304 assert(!"Invalid ir opcode in glsl_to_tgsi_visitor::visit()");
2305 break;
2306 }
2307
2308 this->result = result_src;
2309 }
2310
2311
2312 void
2313 glsl_to_tgsi_visitor::visit(ir_swizzle *ir)
2314 {
2315 st_src_reg src;
2316 int i;
2317 int swizzle[4];
2318
2319 /* Note that this is only swizzles in expressions, not those on the left
2320 * hand side of an assignment, which do write masking. See ir_assignment
2321 * for that.
2322 */
2323
2324 ir->val->accept(this);
2325 src = this->result;
2326 assert(src.file != PROGRAM_UNDEFINED);
2327 assert(ir->type->vector_elements > 0);
2328
2329 for (i = 0; i < 4; i++) {
2330 if (i < ir->type->vector_elements) {
2331 switch (i) {
2332 case 0:
2333 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x);
2334 break;
2335 case 1:
2336 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y);
2337 break;
2338 case 2:
2339 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z);
2340 break;
2341 case 3:
2342 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w);
2343 break;
2344 }
2345 } else {
2346 /* If the type is smaller than a vec4, replicate the last
2347 * channel out.
2348 */
2349 swizzle[i] = swizzle[ir->type->vector_elements - 1];
2350 }
2351 }
2352
2353 src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
2354
2355 this->result = src;
2356 }
2357
2358 /* Test if the variable is an array. Note that geometry and
2359 * tessellation shader inputs are outputs are always arrays (except
2360 * for patch inputs), so only the array element type is considered.
2361 */
2362 static bool
2363 is_inout_array(unsigned stage, ir_variable *var, bool *remove_array)
2364 {
2365 const glsl_type *type = var->type;
2366
2367 *remove_array = false;
2368
2369 if ((stage == MESA_SHADER_VERTEX && var->data.mode == ir_var_shader_in) ||
2370 (stage == MESA_SHADER_FRAGMENT && var->data.mode == ir_var_shader_out))
2371 return false;
2372
2373 if (((stage == MESA_SHADER_GEOMETRY && var->data.mode == ir_var_shader_in) ||
2374 (stage == MESA_SHADER_TESS_EVAL && var->data.mode == ir_var_shader_in) ||
2375 stage == MESA_SHADER_TESS_CTRL) &&
2376 !var->data.patch) {
2377 if (!var->type->is_array())
2378 return false; /* a system value probably */
2379
2380 type = var->type->fields.array;
2381 *remove_array = true;
2382 }
2383
2384 return type->is_array() || type->is_matrix();
2385 }
2386
2387 static unsigned
2388 st_translate_interp_loc(ir_variable *var)
2389 {
2390 if (var->data.centroid)
2391 return TGSI_INTERPOLATE_LOC_CENTROID;
2392 else if (var->data.sample)
2393 return TGSI_INTERPOLATE_LOC_SAMPLE;
2394 else
2395 return TGSI_INTERPOLATE_LOC_CENTER;
2396 }
2397
2398 void
2399 glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
2400 {
2401 variable_storage *entry = find_variable_storage(ir->var);
2402 ir_variable *var = ir->var;
2403 bool remove_array;
2404
2405 if (!entry) {
2406 switch (var->data.mode) {
2407 case ir_var_uniform:
2408 entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
2409 var->data.param_index);
2410 this->variables.push_tail(entry);
2411 break;
2412 case ir_var_shader_in: {
2413 /* The linker assigns locations for varyings and attributes,
2414 * including deprecated builtins (like gl_Color), user-assign
2415 * generic attributes (glBindVertexLocation), and
2416 * user-defined varyings.
2417 */
2418 assert(var->data.location != -1);
2419
2420 const glsl_type *type_without_array = var->type->without_array();
2421 struct inout_decl *decl = &inputs[num_inputs];
2422 unsigned component = var->data.location_frac;
2423 unsigned num_components;
2424 num_inputs++;
2425
2426 if (type_without_array->is_64bit())
2427 component = component / 2;
2428 if (type_without_array->vector_elements)
2429 num_components = type_without_array->vector_elements;
2430 else
2431 num_components = 4;
2432
2433 decl->mesa_index = var->data.location;
2434 decl->interp = (glsl_interp_mode) var->data.interpolation;
2435 decl->interp_loc = st_translate_interp_loc(var);
2436 decl->base_type = type_without_array->base_type;
2437 decl->usage_mask = u_bit_consecutive(component, num_components);
2438
2439 if (is_inout_array(shader->Stage, var, &remove_array)) {
2440 decl->array_id = num_input_arrays + 1;
2441 num_input_arrays++;
2442 } else {
2443 decl->array_id = 0;
2444 }
2445
2446 if (remove_array)
2447 decl->size = type_size(var->type->fields.array);
2448 else
2449 decl->size = type_size(var->type);
2450
2451 entry = new(mem_ctx) variable_storage(var,
2452 PROGRAM_INPUT,
2453 decl->mesa_index,
2454 decl->array_id);
2455 entry->component = component;
2456
2457 this->variables.push_tail(entry);
2458 break;
2459 }
2460 case ir_var_shader_out: {
2461 assert(var->data.location != -1);
2462
2463 const glsl_type *type_without_array = var->type->without_array();
2464 struct inout_decl *decl = &outputs[num_outputs];
2465 unsigned component = var->data.location_frac;
2466 unsigned num_components;
2467 num_outputs++;
2468
2469 if (type_without_array->is_64bit())
2470 component = component / 2;
2471 if (type_without_array->vector_elements)
2472 num_components = type_without_array->vector_elements;
2473 else
2474 num_components = 4;
2475
2476 decl->mesa_index = var->data.location + FRAG_RESULT_MAX * var->data.index;
2477 decl->base_type = type_without_array->base_type;
2478 decl->usage_mask = u_bit_consecutive(component, num_components);
2479
2480 if (is_inout_array(shader->Stage, var, &remove_array)) {
2481 decl->array_id = num_output_arrays + 1;
2482 num_output_arrays++;
2483 } else {
2484 decl->array_id = 0;
2485 }
2486
2487 if (remove_array)
2488 decl->size = type_size(var->type->fields.array);
2489 else
2490 decl->size = type_size(var->type);
2491
2492 entry = new(mem_ctx) variable_storage(var,
2493 PROGRAM_OUTPUT,
2494 decl->mesa_index,
2495 decl->array_id);
2496 entry->component = component;
2497
2498 this->variables.push_tail(entry);
2499 break;
2500 }
2501 case ir_var_system_value:
2502 entry = new(mem_ctx) variable_storage(var,
2503 PROGRAM_SYSTEM_VALUE,
2504 var->data.location);
2505 break;
2506 case ir_var_auto:
2507 case ir_var_temporary:
2508 st_src_reg src = get_temp(var->type);
2509
2510 entry = new(mem_ctx) variable_storage(var, src.file, src.index,
2511 src.array_id);
2512 this->variables.push_tail(entry);
2513
2514 break;
2515 }
2516
2517 if (!entry) {
2518 printf("Failed to make storage for %s\n", var->name);
2519 exit(1);
2520 }
2521 }
2522
2523 this->result = st_src_reg(entry->file, entry->index, var->type,
2524 entry->component, entry->array_id);
2525 if (this->shader->Stage == MESA_SHADER_VERTEX && var->data.mode == ir_var_shader_in && var->type->is_double())
2526 this->result.is_double_vertex_input = true;
2527 if (!native_integers)
2528 this->result.type = GLSL_TYPE_FLOAT;
2529 }
2530
2531 static void
2532 shrink_array_declarations(struct inout_decl *decls, unsigned count,
2533 GLbitfield64* usage_mask,
2534 GLbitfield64 double_usage_mask,
2535 GLbitfield* patch_usage_mask)
2536 {
2537 unsigned i;
2538 int j;
2539
2540 /* Fix array declarations by removing unused array elements at both ends
2541 * of the arrays. For example, mat4[3] where only mat[1] is used.
2542 */
2543 for (i = 0; i < count; i++) {
2544 struct inout_decl *decl = &decls[i];
2545 if (!decl->array_id)
2546 continue;
2547
2548 /* Shrink the beginning. */
2549 for (j = 0; j < (int)decl->size; j++) {
2550 if (decl->mesa_index >= VARYING_SLOT_PATCH0) {
2551 if (*patch_usage_mask &
2552 BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j))
2553 break;
2554 }
2555 else {
2556 if (*usage_mask & BITFIELD64_BIT(decl->mesa_index+j))
2557 break;
2558 if (double_usage_mask & BITFIELD64_BIT(decl->mesa_index+j-1))
2559 break;
2560 }
2561
2562 decl->mesa_index++;
2563 decl->size--;
2564 j--;
2565 }
2566
2567 /* Shrink the end. */
2568 for (j = decl->size-1; j >= 0; j--) {
2569 if (decl->mesa_index >= VARYING_SLOT_PATCH0) {
2570 if (*patch_usage_mask &
2571 BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j))
2572 break;
2573 }
2574 else {
2575 if (*usage_mask & BITFIELD64_BIT(decl->mesa_index+j))
2576 break;
2577 if (double_usage_mask & BITFIELD64_BIT(decl->mesa_index+j-1))
2578 break;
2579 }
2580
2581 decl->size--;
2582 }
2583
2584 /* When not all entries of an array are accessed, we mark them as used
2585 * here anyway, to ensure that the input/output mapping logic doesn't get
2586 * confused.
2587 *
2588 * TODO This happens when an array isn't used via indirect access, which
2589 * some game ports do (at least eON-based). There is an optimization
2590 * opportunity here by replacing the array declaration with non-array
2591 * declarations of those slots that are actually used.
2592 */
2593 for (j = 1; j < (int)decl->size; ++j) {
2594 if (decl->mesa_index >= VARYING_SLOT_PATCH0)
2595 *patch_usage_mask |= BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j);
2596 else
2597 *usage_mask |= BITFIELD64_BIT(decl->mesa_index + j);
2598 }
2599 }
2600 }
2601
2602 void
2603 glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
2604 {
2605 ir_constant *index;
2606 st_src_reg src;
2607 int element_size = type_size(ir->type);
2608 bool is_2D = false;
2609
2610 index = ir->array_index->constant_expression_value();
2611
2612 ir->array->accept(this);
2613 src = this->result;
2614
2615 if (ir->array->ir_type != ir_type_dereference_array) {
2616 switch (this->prog->Target) {
2617 case GL_TESS_CONTROL_PROGRAM_NV:
2618 is_2D = (src.file == PROGRAM_INPUT || src.file == PROGRAM_OUTPUT) &&
2619 !ir->variable_referenced()->data.patch;
2620 break;
2621 case GL_TESS_EVALUATION_PROGRAM_NV:
2622 is_2D = src.file == PROGRAM_INPUT &&
2623 !ir->variable_referenced()->data.patch;
2624 break;
2625 case GL_GEOMETRY_PROGRAM_NV:
2626 is_2D = src.file == PROGRAM_INPUT;
2627 break;
2628 }
2629 }
2630
2631 if (is_2D)
2632 element_size = 1;
2633
2634 if (index) {
2635
2636 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
2637 src.file == PROGRAM_INPUT)
2638 element_size = attrib_type_size(ir->type, true);
2639 if (is_2D) {
2640 src.index2D = index->value.i[0];
2641 src.has_index2 = true;
2642 } else
2643 src.index += index->value.i[0] * element_size;
2644 } else {
2645 /* Variable index array dereference. It eats the "vec4" of the
2646 * base of the array and an index that offsets the TGSI register
2647 * index.
2648 */
2649 ir->array_index->accept(this);
2650
2651 st_src_reg index_reg;
2652
2653 if (element_size == 1) {
2654 index_reg = this->result;
2655 } else {
2656 index_reg = get_temp(native_integers ?
2657 glsl_type::int_type : glsl_type::float_type);
2658
2659 emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg),
2660 this->result, st_src_reg_for_type(index_reg.type, element_size));
2661 }
2662
2663 /* If there was already a relative address register involved, add the
2664 * new and the old together to get the new offset.
2665 */
2666 if (!is_2D && src.reladdr != NULL) {
2667 st_src_reg accum_reg = get_temp(native_integers ?
2668 glsl_type::int_type : glsl_type::float_type);
2669
2670 emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg),
2671 index_reg, *src.reladdr);
2672
2673 index_reg = accum_reg;
2674 }
2675
2676 if (is_2D) {
2677 src.reladdr2 = ralloc(mem_ctx, st_src_reg);
2678 memcpy(src.reladdr2, &index_reg, sizeof(index_reg));
2679 src.index2D = 0;
2680 src.has_index2 = true;
2681 } else {
2682 src.reladdr = ralloc(mem_ctx, st_src_reg);
2683 memcpy(src.reladdr, &index_reg, sizeof(index_reg));
2684 }
2685 }
2686
2687 /* Change the register type to the element type of the array. */
2688 src.type = ir->type->base_type;
2689
2690 this->result = src;
2691 }
2692
2693 void
2694 glsl_to_tgsi_visitor::visit(ir_dereference_record *ir)
2695 {
2696 unsigned int i;
2697 const glsl_type *struct_type = ir->record->type;
2698 int offset = 0;
2699
2700 ir->record->accept(this);
2701
2702 for (i = 0; i < struct_type->length; i++) {
2703 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
2704 break;
2705 offset += type_size(struct_type->fields.structure[i].type);
2706 }
2707
2708 /* If the type is smaller than a vec4, replicate the last channel out. */
2709 if (ir->type->is_scalar() || ir->type->is_vector())
2710 this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
2711 else
2712 this->result.swizzle = SWIZZLE_NOOP;
2713
2714 this->result.index += offset;
2715 this->result.type = ir->type->base_type;
2716 }
2717
2718 /**
2719 * We want to be careful in assignment setup to hit the actual storage
2720 * instead of potentially using a temporary like we might with the
2721 * ir_dereference handler.
2722 */
2723 static st_dst_reg
2724 get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v, int *component)
2725 {
2726 /* The LHS must be a dereference. If the LHS is a variable indexed array
2727 * access of a vector, it must be separated into a series conditional moves
2728 * before reaching this point (see ir_vec_index_to_cond_assign).
2729 */
2730 assert(ir->as_dereference());
2731 ir_dereference_array *deref_array = ir->as_dereference_array();
2732 if (deref_array) {
2733 assert(!deref_array->array->type->is_vector());
2734 }
2735
2736 /* Use the rvalue deref handler for the most part. We write swizzles using
2737 * the writemask, but we do extract the base component for enhanced layouts
2738 * from the source swizzle.
2739 */
2740 ir->accept(v);
2741 *component = GET_SWZ(v->result.swizzle, 0);
2742 return st_dst_reg(v->result);
2743 }
2744
2745 /**
2746 * Process the condition of a conditional assignment
2747 *
2748 * Examines the condition of a conditional assignment to generate the optimal
2749 * first operand of a \c CMP instruction. If the condition is a relational
2750 * operator with 0 (e.g., \c ir_binop_less), the value being compared will be
2751 * used as the source for the \c CMP instruction. Otherwise the comparison
2752 * is processed to a boolean result, and the boolean result is used as the
2753 * operand to the CMP instruction.
2754 */
2755 bool
2756 glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir)
2757 {
2758 ir_rvalue *src_ir = ir;
2759 bool negate = true;
2760 bool switch_order = false;
2761
2762 ir_expression *const expr = ir->as_expression();
2763
2764 if (native_integers) {
2765 if ((expr != NULL) && (expr->get_num_operands() == 2)) {
2766 enum glsl_base_type type = expr->operands[0]->type->base_type;
2767 if (type == GLSL_TYPE_INT || type == GLSL_TYPE_UINT ||
2768 type == GLSL_TYPE_BOOL) {
2769 if (expr->operation == ir_binop_equal) {
2770 if (expr->operands[0]->is_zero()) {
2771 src_ir = expr->operands[1];
2772 switch_order = true;
2773 }
2774 else if (expr->operands[1]->is_zero()) {
2775 src_ir = expr->operands[0];
2776 switch_order = true;
2777 }
2778 }
2779 else if (expr->operation == ir_binop_nequal) {
2780 if (expr->operands[0]->is_zero()) {
2781 src_ir = expr->operands[1];
2782 }
2783 else if (expr->operands[1]->is_zero()) {
2784 src_ir = expr->operands[0];
2785 }
2786 }
2787 }
2788 }
2789
2790 src_ir->accept(this);
2791 return switch_order;
2792 }
2793
2794 if ((expr != NULL) && (expr->get_num_operands() == 2)) {
2795 bool zero_on_left = false;
2796
2797 if (expr->operands[0]->is_zero()) {
2798 src_ir = expr->operands[1];
2799 zero_on_left = true;
2800 } else if (expr->operands[1]->is_zero()) {
2801 src_ir = expr->operands[0];
2802 zero_on_left = false;
2803 }
2804
2805 /* a is - 0 + - 0 +
2806 * (a < 0) T F F ( a < 0) T F F
2807 * (0 < a) F F T (-a < 0) F F T
2808 * (a <= 0) T T F (-a < 0) F F T (swap order of other operands)
2809 * (0 <= a) F T T ( a < 0) T F F (swap order of other operands)
2810 * (a > 0) F F T (-a < 0) F F T
2811 * (0 > a) T F F ( a < 0) T F F
2812 * (a >= 0) F T T ( a < 0) T F F (swap order of other operands)
2813 * (0 >= a) T T F (-a < 0) F F T (swap order of other operands)
2814 *
2815 * Note that exchanging the order of 0 and 'a' in the comparison simply
2816 * means that the value of 'a' should be negated.
2817 */
2818 if (src_ir != ir) {
2819 switch (expr->operation) {
2820 case ir_binop_less:
2821 switch_order = false;
2822 negate = zero_on_left;
2823 break;
2824
2825 case ir_binop_greater:
2826 switch_order = false;
2827 negate = !zero_on_left;
2828 break;
2829
2830 case ir_binop_lequal:
2831 switch_order = true;
2832 negate = !zero_on_left;
2833 break;
2834
2835 case ir_binop_gequal:
2836 switch_order = true;
2837 negate = zero_on_left;
2838 break;
2839
2840 default:
2841 /* This isn't the right kind of comparison afterall, so make sure
2842 * the whole condition is visited.
2843 */
2844 src_ir = ir;
2845 break;
2846 }
2847 }
2848 }
2849
2850 src_ir->accept(this);
2851
2852 /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
2853 * condition we produced is 0.0 or 1.0. By flipping the sign, we can
2854 * choose which value TGSI_OPCODE_CMP produces without an extra instruction
2855 * computing the condition.
2856 */
2857 if (negate)
2858 this->result.negate = ~this->result.negate;
2859
2860 return switch_order;
2861 }
2862
2863 void
2864 glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, const struct glsl_type *type,
2865 st_dst_reg *l, st_src_reg *r,
2866 st_src_reg *cond, bool cond_swap)
2867 {
2868 if (type->base_type == GLSL_TYPE_STRUCT) {
2869 for (unsigned int i = 0; i < type->length; i++) {
2870 emit_block_mov(ir, type->fields.structure[i].type, l, r,
2871 cond, cond_swap);
2872 }
2873 return;
2874 }
2875
2876 if (type->is_array()) {
2877 for (unsigned int i = 0; i < type->length; i++) {
2878 emit_block_mov(ir, type->fields.array, l, r, cond, cond_swap);
2879 }
2880 return;
2881 }
2882
2883 if (type->is_matrix()) {
2884 const struct glsl_type *vec_type;
2885
2886 vec_type = glsl_type::get_instance(type->is_double() ? GLSL_TYPE_DOUBLE : GLSL_TYPE_FLOAT,
2887 type->vector_elements, 1);
2888
2889 for (int i = 0; i < type->matrix_columns; i++) {
2890 emit_block_mov(ir, vec_type, l, r, cond, cond_swap);
2891 }
2892 return;
2893 }
2894
2895 assert(type->is_scalar() || type->is_vector());
2896
2897 l->type = type->base_type;
2898 r->type = type->base_type;
2899 if (cond) {
2900 st_src_reg l_src = st_src_reg(*l);
2901 l_src.swizzle = swizzle_for_size(type->vector_elements);
2902
2903 if (native_integers) {
2904 emit_asm(ir, TGSI_OPCODE_UCMP, *l, *cond,
2905 cond_swap ? l_src : *r,
2906 cond_swap ? *r : l_src);
2907 } else {
2908 emit_asm(ir, TGSI_OPCODE_CMP, *l, *cond,
2909 cond_swap ? l_src : *r,
2910 cond_swap ? *r : l_src);
2911 }
2912 } else {
2913 emit_asm(ir, TGSI_OPCODE_MOV, *l, *r);
2914 }
2915 l->index++;
2916 r->index++;
2917 if (type->is_dual_slot()) {
2918 l->index++;
2919 if (r->is_double_vertex_input == false)
2920 r->index++;
2921 }
2922 }
2923
2924 void
2925 glsl_to_tgsi_visitor::visit(ir_assignment *ir)
2926 {
2927 int dst_component;
2928 st_dst_reg l;
2929 st_src_reg r;
2930
2931 ir->rhs->accept(this);
2932 r = this->result;
2933
2934 l = get_assignment_lhs(ir->lhs, this, &dst_component);
2935
2936 {
2937 int swizzles[4];
2938 int first_enabled_chan = 0;
2939 int rhs_chan = 0;
2940 ir_variable *variable = ir->lhs->variable_referenced();
2941
2942 if (shader->Stage == MESA_SHADER_FRAGMENT &&
2943 variable->data.mode == ir_var_shader_out &&
2944 (variable->data.location == FRAG_RESULT_DEPTH ||
2945 variable->data.location == FRAG_RESULT_STENCIL)) {
2946 assert(ir->lhs->type->is_scalar());
2947 assert(ir->write_mask == WRITEMASK_X);
2948
2949 if (variable->data.location == FRAG_RESULT_DEPTH)
2950 l.writemask = WRITEMASK_Z;
2951 else {
2952 assert(variable->data.location == FRAG_RESULT_STENCIL);
2953 l.writemask = WRITEMASK_Y;
2954 }
2955 } else if (ir->write_mask == 0) {
2956 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
2957
2958 unsigned num_elements = ir->lhs->type->without_array()->vector_elements;
2959
2960 if (num_elements) {
2961 l.writemask = u_bit_consecutive(0, num_elements);
2962 } else {
2963 /* The type is a struct or an array of (array of) structs. */
2964 l.writemask = WRITEMASK_XYZW;
2965 }
2966 } else {
2967 l.writemask = ir->write_mask;
2968 }
2969
2970 for (int i = 0; i < 4; i++) {
2971 if (l.writemask & (1 << i)) {
2972 first_enabled_chan = GET_SWZ(r.swizzle, i);
2973 break;
2974 }
2975 }
2976
2977 l.writemask = l.writemask << dst_component;
2978
2979 /* Swizzle a small RHS vector into the channels being written.
2980 *
2981 * glsl ir treats write_mask as dictating how many channels are
2982 * present on the RHS while TGSI treats write_mask as just
2983 * showing which channels of the vec4 RHS get written.
2984 */
2985 for (int i = 0; i < 4; i++) {
2986 if (l.writemask & (1 << i))
2987 swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++);
2988 else
2989 swizzles[i] = first_enabled_chan;
2990 }
2991 r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1],
2992 swizzles[2], swizzles[3]);
2993 }
2994
2995 assert(l.file != PROGRAM_UNDEFINED);
2996 assert(r.file != PROGRAM_UNDEFINED);
2997
2998 if (ir->condition) {
2999 const bool switch_order = this->process_move_condition(ir->condition);
3000 st_src_reg condition = this->result;
3001
3002 emit_block_mov(ir, ir->lhs->type, &l, &r, &condition, switch_order);
3003 } else if (ir->rhs->as_expression() &&
3004 this->instructions.get_tail() &&
3005 ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir &&
3006 !((glsl_to_tgsi_instruction *)this->instructions.get_tail())->is_64bit_expanded &&
3007 type_size(ir->lhs->type) == 1 &&
3008 l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst[0].writemask) {
3009 /* To avoid emitting an extra MOV when assigning an expression to a
3010 * variable, emit the last instruction of the expression again, but
3011 * replace the destination register with the target of the assignment.
3012 * Dead code elimination will remove the original instruction.
3013 */
3014 glsl_to_tgsi_instruction *inst, *new_inst;
3015 inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
3016 new_inst = emit_asm(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2], inst->src[3]);
3017 new_inst->saturate = inst->saturate;
3018 inst->dead_mask = inst->dst[0].writemask;
3019 } else {
3020 emit_block_mov(ir, ir->rhs->type, &l, &r, NULL, false);
3021 }
3022 }
3023
3024
3025 void
3026 glsl_to_tgsi_visitor::visit(ir_constant *ir)
3027 {
3028 st_src_reg src;
3029 GLdouble stack_vals[4] = { 0 };
3030 gl_constant_value *values = (gl_constant_value *) stack_vals;
3031 GLenum gl_type = GL_NONE;
3032 unsigned int i;
3033 static int in_array = 0;
3034 gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE;
3035
3036 /* Unfortunately, 4 floats is all we can get into
3037 * _mesa_add_typed_unnamed_constant. So, make a temp to store an
3038 * aggregate constant and move each constant value into it. If we
3039 * get lucky, copy propagation will eliminate the extra moves.
3040 */
3041 if (ir->type->base_type == GLSL_TYPE_STRUCT) {
3042 st_src_reg temp_base = get_temp(ir->type);
3043 st_dst_reg temp = st_dst_reg(temp_base);
3044
3045 foreach_in_list(ir_constant, field_value, &ir->components) {
3046 int size = type_size(field_value->type);
3047
3048 assert(size > 0);
3049
3050 field_value->accept(this);
3051 src = this->result;
3052
3053 for (i = 0; i < (unsigned int)size; i++) {
3054 emit_asm(ir, TGSI_OPCODE_MOV, temp, src);
3055
3056 src.index++;
3057 temp.index++;
3058 }
3059 }
3060 this->result = temp_base;
3061 return;
3062 }
3063
3064 if (ir->type->is_array()) {
3065 st_src_reg temp_base = get_temp(ir->type);
3066 st_dst_reg temp = st_dst_reg(temp_base);
3067 int size = type_size(ir->type->fields.array);
3068
3069 assert(size > 0);
3070 in_array++;
3071
3072 for (i = 0; i < ir->type->length; i++) {
3073 ir->array_elements[i]->accept(this);
3074 src = this->result;
3075 for (int j = 0; j < size; j++) {
3076 emit_asm(ir, TGSI_OPCODE_MOV, temp, src);
3077
3078 src.index++;
3079 temp.index++;
3080 }
3081 }
3082 this->result = temp_base;
3083 in_array--;
3084 return;
3085 }
3086
3087 if (ir->type->is_matrix()) {
3088 st_src_reg mat = get_temp(ir->type);
3089 st_dst_reg mat_column = st_dst_reg(mat);
3090
3091 for (i = 0; i < ir->type->matrix_columns; i++) {
3092 switch (ir->type->base_type) {
3093 case GLSL_TYPE_FLOAT:
3094 values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements];
3095
3096 src = st_src_reg(file, -1, ir->type->base_type);
3097 src.index = add_constant(file,
3098 values,
3099 ir->type->vector_elements,
3100 GL_FLOAT,
3101 &src.swizzle);
3102 emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
3103 break;
3104 case GLSL_TYPE_DOUBLE:
3105 values = (gl_constant_value *) &ir->value.d[i * ir->type->vector_elements];
3106 src = st_src_reg(file, -1, ir->type->base_type);
3107 src.index = add_constant(file,
3108 values,
3109 ir->type->vector_elements,
3110 GL_DOUBLE,
3111 &src.swizzle);
3112 if (ir->type->vector_elements >= 2) {
3113 mat_column.writemask = WRITEMASK_XY;
3114 src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y);
3115 emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
3116 } else {
3117 mat_column.writemask = WRITEMASK_X;
3118 src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X);
3119 emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
3120 }
3121 src.index++;
3122 if (ir->type->vector_elements > 2) {
3123 if (ir->type->vector_elements == 4) {
3124 mat_column.writemask = WRITEMASK_ZW;
3125 src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y);
3126 emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
3127 } else {
3128 mat_column.writemask = WRITEMASK_Z;
3129 src.swizzle = MAKE_SWIZZLE4(SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y);
3130 emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
3131 mat_column.writemask = WRITEMASK_XYZW;
3132 src.swizzle = SWIZZLE_XYZW;
3133 }
3134 mat_column.index++;
3135 }
3136 break;
3137 default:
3138 unreachable("Illegal matrix constant type.\n");
3139 break;
3140 }
3141 mat_column.index++;
3142 }
3143 this->result = mat;
3144 return;
3145 }
3146
3147 switch (ir->type->base_type) {
3148 case GLSL_TYPE_FLOAT:
3149 gl_type = GL_FLOAT;
3150 for (i = 0; i < ir->type->vector_elements; i++) {
3151 values[i].f = ir->value.f[i];
3152 }
3153 break;
3154 case GLSL_TYPE_DOUBLE:
3155 gl_type = GL_DOUBLE;
3156 for (i = 0; i < ir->type->vector_elements; i++) {
3157 memcpy(&values[i * 2], &ir->value.d[i], sizeof(double));
3158 }
3159 break;
3160 case GLSL_TYPE_UINT:
3161 gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT;
3162 for (i = 0; i < ir->type->vector_elements; i++) {
3163 if (native_integers)
3164 values[i].u = ir->value.u[i];
3165 else
3166 values[i].f = ir->value.u[i];
3167 }
3168 break;
3169 case GLSL_TYPE_INT:
3170 gl_type = native_integers ? GL_INT : GL_FLOAT;
3171 for (i = 0; i < ir->type->vector_elements; i++) {
3172 if (native_integers)
3173 values[i].i = ir->value.i[i];
3174 else
3175 values[i].f = ir->value.i[i];
3176 }
3177 break;
3178 case GLSL_TYPE_BOOL:
3179 gl_type = native_integers ? GL_BOOL : GL_FLOAT;
3180 for (i = 0; i < ir->type->vector_elements; i++) {
3181 values[i].u = ir->value.b[i] ? ctx->Const.UniformBooleanTrue : 0;
3182 }
3183 break;
3184 default:
3185 assert(!"Non-float/uint/int/bool constant");
3186 }
3187
3188 this->result = st_src_reg(file, -1, ir->type);
3189 this->result.index = add_constant(file,
3190 values,
3191 ir->type->vector_elements,
3192 gl_type,
3193 &this->result.swizzle);
3194 }
3195
3196 void
3197 glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
3198 {
3199 exec_node *param = ir->actual_parameters.get_head();
3200 ir_dereference *deref = static_cast<ir_dereference *>(param);
3201 ir_variable *location = deref->variable_referenced();
3202
3203 st_src_reg buffer(
3204 PROGRAM_BUFFER, location->data.binding, GLSL_TYPE_ATOMIC_UINT);
3205
3206 /* Calculate the surface offset */
3207 st_src_reg offset;
3208 unsigned array_size = 0, base = 0;
3209 uint16_t index = 0;
3210
3211 get_deref_offsets(deref, &array_size, &base, &index, &offset, false);
3212
3213 if (offset.file != PROGRAM_UNDEFINED) {
3214 emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(offset),
3215 offset, st_src_reg_for_int(ATOMIC_COUNTER_SIZE));
3216 emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(offset),
3217 offset, st_src_reg_for_int(location->data.offset + index * ATOMIC_COUNTER_SIZE));
3218 } else {
3219 offset = st_src_reg_for_int(location->data.offset + index * ATOMIC_COUNTER_SIZE);
3220 }
3221
3222 ir->return_deref->accept(this);
3223 st_dst_reg dst(this->result);
3224 dst.writemask = WRITEMASK_X;
3225
3226 glsl_to_tgsi_instruction *inst;
3227
3228 if (ir->callee->intrinsic_id == ir_intrinsic_atomic_counter_read) {
3229 inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, offset);
3230 } else if (ir->callee->intrinsic_id == ir_intrinsic_atomic_counter_increment) {
3231 inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset,
3232 st_src_reg_for_int(1));
3233 } else if (ir->callee->intrinsic_id == ir_intrinsic_atomic_counter_predecrement) {
3234 inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset,
3235 st_src_reg_for_int(-1));
3236 emit_asm(ir, TGSI_OPCODE_ADD, dst, this->result, st_src_reg_for_int(-1));
3237 } else {
3238 param = param->get_next();
3239 ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
3240 val->accept(this);
3241
3242 st_src_reg data = this->result, data2 = undef_src;
3243 unsigned opcode;
3244 switch (ir->callee->intrinsic_id) {
3245 case ir_intrinsic_atomic_counter_add:
3246 opcode = TGSI_OPCODE_ATOMUADD;
3247 break;
3248 case ir_intrinsic_atomic_counter_min:
3249 opcode = TGSI_OPCODE_ATOMIMIN;
3250 break;
3251 case ir_intrinsic_atomic_counter_max:
3252 opcode = TGSI_OPCODE_ATOMIMAX;
3253 break;
3254 case ir_intrinsic_atomic_counter_and:
3255 opcode = TGSI_OPCODE_ATOMAND;
3256 break;
3257 case ir_intrinsic_atomic_counter_or:
3258 opcode = TGSI_OPCODE_ATOMOR;
3259 break;
3260 case ir_intrinsic_atomic_counter_xor:
3261 opcode = TGSI_OPCODE_ATOMXOR;
3262 break;
3263 case ir_intrinsic_atomic_counter_exchange:
3264 opcode = TGSI_OPCODE_ATOMXCHG;
3265 break;
3266 case ir_intrinsic_atomic_counter_comp_swap: {
3267 opcode = TGSI_OPCODE_ATOMCAS;
3268 param = param->get_next();
3269 val = ((ir_instruction *)param)->as_rvalue();
3270 val->accept(this);
3271 data2 = this->result;
3272 break;
3273 }
3274 default:
3275 assert(!"Unexpected intrinsic");
3276 return;
3277 }
3278
3279 inst = emit_asm(ir, opcode, dst, offset, data, data2);
3280 }
3281
3282 inst->resource = buffer;
3283 }
3284
3285 void
3286 glsl_to_tgsi_visitor::visit_ssbo_intrinsic(ir_call *ir)
3287 {
3288 exec_node *param = ir->actual_parameters.get_head();
3289
3290 ir_rvalue *block = ((ir_instruction *)param)->as_rvalue();
3291
3292 param = param->get_next();
3293 ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
3294
3295 ir_constant *const_block = block->as_constant();
3296
3297 st_src_reg buffer(
3298 PROGRAM_BUFFER,
3299 ctx->Const.Program[shader->Stage].MaxAtomicBuffers +
3300 (const_block ? const_block->value.u[0] : 0),
3301 GLSL_TYPE_UINT);
3302
3303 if (!const_block) {
3304 block->accept(this);
3305 buffer.reladdr = ralloc(mem_ctx, st_src_reg);
3306 *buffer.reladdr = this->result;
3307 emit_arl(ir, sampler_reladdr, this->result);
3308 }
3309
3310 /* Calculate the surface offset */
3311 offset->accept(this);
3312 st_src_reg off = this->result;
3313
3314 st_dst_reg dst = undef_dst;
3315 if (ir->return_deref) {
3316 ir->return_deref->accept(this);
3317 dst = st_dst_reg(this->result);
3318 dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1;
3319 }
3320
3321 glsl_to_tgsi_instruction *inst;
3322
3323 if (ir->callee->intrinsic_id == ir_intrinsic_ssbo_load) {
3324 inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, off);
3325 if (dst.type == GLSL_TYPE_BOOL)
3326 emit_asm(ir, TGSI_OPCODE_USNE, dst, st_src_reg(dst), st_src_reg_for_int(0));
3327 } else if (ir->callee->intrinsic_id == ir_intrinsic_ssbo_store) {
3328 param = param->get_next();
3329 ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
3330 val->accept(this);
3331
3332 param = param->get_next();
3333 ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
3334 assert(write_mask);
3335 dst.writemask = write_mask->value.u[0];
3336
3337 dst.type = this->result.type;
3338 inst = emit_asm(ir, TGSI_OPCODE_STORE, dst, off, this->result);
3339 } else {
3340 param = param->get_next();
3341 ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
3342 val->accept(this);
3343
3344 st_src_reg data = this->result, data2 = undef_src;
3345 unsigned opcode;
3346 switch (ir->callee->intrinsic_id) {
3347 case ir_intrinsic_ssbo_atomic_add:
3348 opcode = TGSI_OPCODE_ATOMUADD;
3349 break;
3350 case ir_intrinsic_ssbo_atomic_min:
3351 opcode = TGSI_OPCODE_ATOMIMIN;
3352 break;
3353 case ir_intrinsic_ssbo_atomic_max:
3354 opcode = TGSI_OPCODE_ATOMIMAX;
3355 break;
3356 case ir_intrinsic_ssbo_atomic_and:
3357 opcode = TGSI_OPCODE_ATOMAND;
3358 break;
3359 case ir_intrinsic_ssbo_atomic_or:
3360 opcode = TGSI_OPCODE_ATOMOR;
3361 break;
3362 case ir_intrinsic_ssbo_atomic_xor:
3363 opcode = TGSI_OPCODE_ATOMXOR;
3364 break;
3365 case ir_intrinsic_ssbo_atomic_exchange:
3366 opcode = TGSI_OPCODE_ATOMXCHG;
3367 break;
3368 case ir_intrinsic_ssbo_atomic_comp_swap:
3369 opcode = TGSI_OPCODE_ATOMCAS;
3370 param = param->get_next();
3371 val = ((ir_instruction *)param)->as_rvalue();
3372 val->accept(this);
3373 data2 = this->result;
3374 break;
3375 default:
3376 assert(!"Unexpected intrinsic");
3377 return;
3378 }
3379
3380 inst = emit_asm(ir, opcode, dst, off, data, data2);
3381 }
3382
3383 param = param->get_next();
3384 ir_constant *access = NULL;
3385 if (!param->is_tail_sentinel()) {
3386 access = ((ir_instruction *)param)->as_constant();
3387 assert(access);
3388 }
3389
3390 /* The emit_asm() might have actually split the op into pieces, e.g. for
3391 * double stores. We have to go back and fix up all the generated ops.
3392 */
3393 unsigned op = inst->op;
3394 do {
3395 inst->resource = buffer;
3396 if (access)
3397 inst->buffer_access = access->value.u[0];
3398 inst = (glsl_to_tgsi_instruction *)inst->get_prev();
3399 if (inst->op == TGSI_OPCODE_UADD)
3400 inst = (glsl_to_tgsi_instruction *)inst->get_prev();
3401 } while (inst && inst->op == op && inst->resource.file == PROGRAM_UNDEFINED);
3402 }
3403
3404 void
3405 glsl_to_tgsi_visitor::visit_membar_intrinsic(ir_call *ir)
3406 {
3407 switch (ir->callee->intrinsic_id) {
3408 case ir_intrinsic_memory_barrier:
3409 emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
3410 st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER |
3411 TGSI_MEMBAR_ATOMIC_BUFFER |
3412 TGSI_MEMBAR_SHADER_IMAGE |
3413 TGSI_MEMBAR_SHARED));
3414 break;
3415 case ir_intrinsic_memory_barrier_atomic_counter:
3416 emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
3417 st_src_reg_for_int(TGSI_MEMBAR_ATOMIC_BUFFER));
3418 break;
3419 case ir_intrinsic_memory_barrier_buffer:
3420 emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
3421 st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER));
3422 break;
3423 case ir_intrinsic_memory_barrier_image:
3424 emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
3425 st_src_reg_for_int(TGSI_MEMBAR_SHADER_IMAGE));
3426 break;
3427 case ir_intrinsic_memory_barrier_shared:
3428 emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
3429 st_src_reg_for_int(TGSI_MEMBAR_SHARED));
3430 break;
3431 case ir_intrinsic_group_memory_barrier:
3432 emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
3433 st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER |
3434 TGSI_MEMBAR_ATOMIC_BUFFER |
3435 TGSI_MEMBAR_SHADER_IMAGE |
3436 TGSI_MEMBAR_SHARED |
3437 TGSI_MEMBAR_THREAD_GROUP));
3438 break;
3439 default:
3440 assert(!"Unexpected memory barrier intrinsic");
3441 }
3442 }
3443
3444 void
3445 glsl_to_tgsi_visitor::visit_shared_intrinsic(ir_call *ir)
3446 {
3447 exec_node *param = ir->actual_parameters.get_head();
3448
3449 ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
3450
3451 st_src_reg buffer(PROGRAM_MEMORY, 0, GLSL_TYPE_UINT);
3452
3453 /* Calculate the surface offset */
3454 offset->accept(this);
3455 st_src_reg off = this->result;
3456
3457 st_dst_reg dst = undef_dst;
3458 if (ir->return_deref) {
3459 ir->return_deref->accept(this);
3460 dst = st_dst_reg(this->result);
3461 dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1;
3462 }
3463
3464 glsl_to_tgsi_instruction *inst;
3465
3466 if (ir->callee->intrinsic_id == ir_intrinsic_shared_load) {
3467 inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, off);
3468 inst->resource = buffer;
3469 } else if (ir->callee->intrinsic_id == ir_intrinsic_shared_store) {
3470 param = param->get_next();
3471 ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
3472 val->accept(this);
3473
3474 param = param->get_next();
3475 ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
3476 assert(write_mask);
3477 dst.writemask = write_mask->value.u[0];
3478
3479 dst.type = this->result.type;
3480 inst = emit_asm(ir, TGSI_OPCODE_STORE, dst, off, this->result);
3481 inst->resource = buffer;
3482 } else {
3483 param = param->get_next();
3484 ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
3485 val->accept(this);
3486
3487 st_src_reg data = this->result, data2 = undef_src;
3488 unsigned opcode;
3489 switch (ir->callee->intrinsic_id) {
3490 case ir_intrinsic_shared_atomic_add:
3491 opcode = TGSI_OPCODE_ATOMUADD;
3492 break;
3493 case ir_intrinsic_shared_atomic_min:
3494 opcode = TGSI_OPCODE_ATOMIMIN;
3495 break;
3496 case ir_intrinsic_shared_atomic_max:
3497 opcode = TGSI_OPCODE_ATOMIMAX;
3498 break;
3499 case ir_intrinsic_shared_atomic_and:
3500 opcode = TGSI_OPCODE_ATOMAND;
3501 break;
3502 case ir_intrinsic_shared_atomic_or:
3503 opcode = TGSI_OPCODE_ATOMOR;
3504 break;
3505 case ir_intrinsic_shared_atomic_xor:
3506 opcode = TGSI_OPCODE_ATOMXOR;
3507 break;
3508 case ir_intrinsic_shared_atomic_exchange:
3509 opcode = TGSI_OPCODE_ATOMXCHG;
3510 break;
3511 case ir_intrinsic_shared_atomic_comp_swap:
3512 opcode = TGSI_OPCODE_ATOMCAS;
3513 param = param->get_next();
3514 val = ((ir_instruction *)param)->as_rvalue();
3515 val->accept(this);
3516 data2 = this->result;
3517 break;
3518 default:
3519 assert(!"Unexpected intrinsic");
3520 return;
3521 }
3522
3523 inst = emit_asm(ir, opcode, dst, off, data, data2);
3524 inst->resource = buffer;
3525 }
3526 }
3527
3528 void
3529 glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir)
3530 {
3531 exec_node *param = ir->actual_parameters.get_head();
3532
3533 ir_dereference *img = (ir_dereference *)param;
3534 const ir_variable *imgvar = img->variable_referenced();
3535 const glsl_type *type = imgvar->type->without_array();
3536 unsigned sampler_array_size = 1, sampler_base = 0;
3537
3538 st_src_reg reladdr;
3539 st_src_reg image(PROGRAM_IMAGE, 0, GLSL_TYPE_UINT);
3540
3541 get_deref_offsets(img, &sampler_array_size, &sampler_base,
3542 (uint16_t*)&image.index, &reladdr, true);
3543
3544 if (reladdr.file != PROGRAM_UNDEFINED) {
3545 image.reladdr = ralloc(mem_ctx, st_src_reg);
3546 *image.reladdr = reladdr;
3547 emit_arl(ir, sampler_reladdr, reladdr);
3548 }
3549
3550 st_dst_reg dst = undef_dst;
3551 if (ir->return_deref) {
3552 ir->return_deref->accept(this);
3553 dst = st_dst_reg(this->result);
3554 dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1;
3555 }
3556
3557 glsl_to_tgsi_instruction *inst;
3558
3559 if (ir->callee->intrinsic_id == ir_intrinsic_image_size) {
3560 dst.writemask = WRITEMASK_XYZ;
3561 inst = emit_asm(ir, TGSI_OPCODE_RESQ, dst);
3562 } else if (ir->callee->intrinsic_id == ir_intrinsic_image_samples) {
3563 st_src_reg res = get_temp(glsl_type::ivec4_type);
3564 st_dst_reg dstres = st_dst_reg(res);
3565 dstres.writemask = WRITEMASK_W;
3566 inst = emit_asm(ir, TGSI_OPCODE_RESQ, dstres);
3567 res.swizzle = SWIZZLE_WWWW;
3568 emit_asm(ir, TGSI_OPCODE_MOV, dst, res);
3569 } else {
3570 st_src_reg arg1 = undef_src, arg2 = undef_src;
3571 st_src_reg coord;
3572 st_dst_reg coord_dst;
3573 coord = get_temp(glsl_type::ivec4_type);
3574 coord_dst = st_dst_reg(coord);
3575 coord_dst.writemask = (1 << type->coordinate_components()) - 1;
3576 param = param->get_next();
3577 ((ir_dereference *)param)->accept(this);
3578 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
3579 coord.swizzle = SWIZZLE_XXXX;
3580 switch (type->coordinate_components()) {
3581 case 4: assert(!"unexpected coord count");
3582 /* fallthrough */
3583 case 3: coord.swizzle |= SWIZZLE_Z << 6;
3584 /* fallthrough */
3585 case 2: coord.swizzle |= SWIZZLE_Y << 3;
3586 }
3587
3588 if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) {
3589 param = param->get_next();
3590 ((ir_dereference *)param)->accept(this);
3591 st_src_reg sample = this->result;
3592 sample.swizzle = SWIZZLE_XXXX;
3593 coord_dst.writemask = WRITEMASK_W;
3594 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, sample);
3595 coord.swizzle |= SWIZZLE_W << 9;
3596 }
3597
3598 param = param->get_next();
3599 if (!param->is_tail_sentinel()) {
3600 ((ir_dereference *)param)->accept(this);
3601 arg1 = this->result;
3602 param = param->get_next();
3603 }
3604
3605 if (!param->is_tail_sentinel()) {
3606 ((ir_dereference *)param)->accept(this);
3607 arg2 = this->result;
3608 param = param->get_next();
3609 }
3610
3611 assert(param->is_tail_sentinel());
3612
3613 unsigned opcode;
3614 switch (ir->callee->intrinsic_id) {
3615 case ir_intrinsic_image_load:
3616 opcode = TGSI_OPCODE_LOAD;
3617 break;
3618 case ir_intrinsic_image_store:
3619 opcode = TGSI_OPCODE_STORE;
3620 break;
3621 case ir_intrinsic_image_atomic_add:
3622 opcode = TGSI_OPCODE_ATOMUADD;
3623 break;
3624 case ir_intrinsic_image_atomic_min:
3625 opcode = TGSI_OPCODE_ATOMIMIN;
3626 break;
3627 case ir_intrinsic_image_atomic_max:
3628 opcode = TGSI_OPCODE_ATOMIMAX;
3629 break;
3630 case ir_intrinsic_image_atomic_and:
3631 opcode = TGSI_OPCODE_ATOMAND;
3632 break;
3633 case ir_intrinsic_image_atomic_or:
3634 opcode = TGSI_OPCODE_ATOMOR;
3635 break;
3636 case ir_intrinsic_image_atomic_xor:
3637 opcode = TGSI_OPCODE_ATOMXOR;
3638 break;
3639 case ir_intrinsic_image_atomic_exchange:
3640 opcode = TGSI_OPCODE_ATOMXCHG;
3641 break;
3642 case ir_intrinsic_image_atomic_comp_swap:
3643 opcode = TGSI_OPCODE_ATOMCAS;
3644 break;
3645 default:
3646 assert(!"Unexpected intrinsic");
3647 return;
3648 }
3649
3650 inst = emit_asm(ir, opcode, dst, coord, arg1, arg2);
3651 if (opcode == TGSI_OPCODE_STORE)
3652 inst->dst[0].writemask = WRITEMASK_XYZW;
3653 }
3654
3655 inst->resource = image;
3656 inst->sampler_array_size = sampler_array_size;
3657 inst->sampler_base = sampler_base;
3658
3659 switch (type->sampler_dimensionality) {
3660 case GLSL_SAMPLER_DIM_1D:
3661 inst->tex_target = (type->sampler_array)
3662 ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
3663 break;
3664 case GLSL_SAMPLER_DIM_2D:
3665 inst->tex_target = (type->sampler_array)
3666 ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
3667 break;
3668 case GLSL_SAMPLER_DIM_3D:
3669 inst->tex_target = TEXTURE_3D_INDEX;
3670 break;
3671 case GLSL_SAMPLER_DIM_CUBE:
3672 inst->tex_target = (type->sampler_array)
3673 ? TEXTURE_CUBE_ARRAY_INDEX : TEXTURE_CUBE_INDEX;
3674 break;
3675 case GLSL_SAMPLER_DIM_RECT:
3676 inst->tex_target = TEXTURE_RECT_INDEX;
3677 break;
3678 case GLSL_SAMPLER_DIM_BUF:
3679 inst->tex_target = TEXTURE_BUFFER_INDEX;
3680 break;
3681 case GLSL_SAMPLER_DIM_EXTERNAL:
3682 inst->tex_target = TEXTURE_EXTERNAL_INDEX;
3683 break;
3684 case GLSL_SAMPLER_DIM_MS:
3685 inst->tex_target = (type->sampler_array)
3686 ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : TEXTURE_2D_MULTISAMPLE_INDEX;
3687 break;
3688 default:
3689 assert(!"Should not get here.");
3690 }
3691
3692 inst->image_format = st_mesa_format_to_pipe_format(st_context(ctx),
3693 _mesa_get_shader_image_format(imgvar->data.image_format));
3694
3695 if (imgvar->data.image_coherent)
3696 inst->buffer_access |= TGSI_MEMORY_COHERENT;
3697 if (imgvar->data.image_restrict)
3698 inst->buffer_access |= TGSI_MEMORY_RESTRICT;
3699 if (imgvar->data.image_volatile)
3700 inst->buffer_access |= TGSI_MEMORY_VOLATILE;
3701 }
3702
3703 void
3704 glsl_to_tgsi_visitor::visit(ir_call *ir)
3705 {
3706 ir_function_signature *sig = ir->callee;
3707
3708 /* Filter out intrinsics */
3709 switch (sig->intrinsic_id) {
3710 case ir_intrinsic_atomic_counter_read:
3711 case ir_intrinsic_atomic_counter_increment:
3712 case ir_intrinsic_atomic_counter_predecrement:
3713 case ir_intrinsic_atomic_counter_add:
3714 case ir_intrinsic_atomic_counter_min:
3715 case ir_intrinsic_atomic_counter_max:
3716 case ir_intrinsic_atomic_counter_and:
3717 case ir_intrinsic_atomic_counter_or:
3718 case ir_intrinsic_atomic_counter_xor:
3719 case ir_intrinsic_atomic_counter_exchange:
3720 case ir_intrinsic_atomic_counter_comp_swap:
3721 visit_atomic_counter_intrinsic(ir);
3722 return;
3723
3724 case ir_intrinsic_ssbo_load:
3725 case ir_intrinsic_ssbo_store:
3726 case ir_intrinsic_ssbo_atomic_add:
3727 case ir_intrinsic_ssbo_atomic_min:
3728 case ir_intrinsic_ssbo_atomic_max:
3729 case ir_intrinsic_ssbo_atomic_and:
3730 case ir_intrinsic_ssbo_atomic_or:
3731 case ir_intrinsic_ssbo_atomic_xor:
3732 case ir_intrinsic_ssbo_atomic_exchange:
3733 case ir_intrinsic_ssbo_atomic_comp_swap:
3734 visit_ssbo_intrinsic(ir);
3735 return;
3736
3737 case ir_intrinsic_memory_barrier:
3738 case ir_intrinsic_memory_barrier_atomic_counter:
3739 case ir_intrinsic_memory_barrier_buffer:
3740 case ir_intrinsic_memory_barrier_image:
3741 case ir_intrinsic_memory_barrier_shared:
3742 case ir_intrinsic_group_memory_barrier:
3743 visit_membar_intrinsic(ir);
3744 return;
3745
3746 case ir_intrinsic_shared_load:
3747 case ir_intrinsic_shared_store:
3748 case ir_intrinsic_shared_atomic_add:
3749 case ir_intrinsic_shared_atomic_min:
3750 case ir_intrinsic_shared_atomic_max:
3751 case ir_intrinsic_shared_atomic_and:
3752 case ir_intrinsic_shared_atomic_or:
3753 case ir_intrinsic_shared_atomic_xor:
3754 case ir_intrinsic_shared_atomic_exchange:
3755 case ir_intrinsic_shared_atomic_comp_swap:
3756 visit_shared_intrinsic(ir);
3757 return;
3758
3759 case ir_intrinsic_image_load:
3760 case ir_intrinsic_image_store:
3761 case ir_intrinsic_image_atomic_add:
3762 case ir_intrinsic_image_atomic_min:
3763 case ir_intrinsic_image_atomic_max:
3764 case ir_intrinsic_image_atomic_and:
3765 case ir_intrinsic_image_atomic_or:
3766 case ir_intrinsic_image_atomic_xor:
3767 case ir_intrinsic_image_atomic_exchange:
3768 case ir_intrinsic_image_atomic_comp_swap:
3769 case ir_intrinsic_image_size:
3770 case ir_intrinsic_image_samples:
3771 visit_image_intrinsic(ir);
3772 return;
3773
3774 case ir_intrinsic_invalid:
3775 case ir_intrinsic_generic_load:
3776 case ir_intrinsic_generic_store:
3777 case ir_intrinsic_generic_atomic_add:
3778 case ir_intrinsic_generic_atomic_and:
3779 case ir_intrinsic_generic_atomic_or:
3780 case ir_intrinsic_generic_atomic_xor:
3781 case ir_intrinsic_generic_atomic_min:
3782 case ir_intrinsic_generic_atomic_max:
3783 case ir_intrinsic_generic_atomic_exchange:
3784 case ir_intrinsic_generic_atomic_comp_swap:
3785 case ir_intrinsic_shader_clock:
3786 unreachable("Invalid intrinsic");
3787 }
3788 }
3789
3790 void
3791 glsl_to_tgsi_visitor::calc_deref_offsets(ir_dereference *tail,
3792 unsigned *array_elements,
3793 uint16_t *index,
3794 st_src_reg *indirect,
3795 unsigned *location)
3796 {
3797 switch (tail->ir_type) {
3798 case ir_type_dereference_record: {
3799 ir_dereference_record *deref_record = tail->as_dereference_record();
3800 const glsl_type *struct_type = deref_record->record->type;
3801 int field_index = deref_record->record->type->field_index(deref_record->field);
3802
3803 calc_deref_offsets(deref_record->record->as_dereference(), array_elements, index, indirect, location);
3804
3805 assert(field_index >= 0);
3806 *location += struct_type->record_location_offset(field_index);
3807 break;
3808 }
3809
3810 case ir_type_dereference_array: {
3811 ir_dereference_array *deref_arr = tail->as_dereference_array();
3812 ir_constant *array_index = deref_arr->array_index->constant_expression_value();
3813
3814 if (!array_index) {
3815 st_src_reg temp_reg;
3816 st_dst_reg temp_dst;
3817
3818 temp_reg = get_temp(glsl_type::uint_type);
3819 temp_dst = st_dst_reg(temp_reg);
3820 temp_dst.writemask = 1;
3821
3822 deref_arr->array_index->accept(this);
3823 if (*array_elements != 1)
3824 emit_asm(NULL, TGSI_OPCODE_MUL, temp_dst, this->result, st_src_reg_for_int(*array_elements));
3825 else
3826 emit_asm(NULL, TGSI_OPCODE_MOV, temp_dst, this->result);
3827
3828 if (indirect->file == PROGRAM_UNDEFINED)
3829 *indirect = temp_reg;
3830 else {
3831 temp_dst = st_dst_reg(*indirect);
3832 temp_dst.writemask = 1;
3833 emit_asm(NULL, TGSI_OPCODE_ADD, temp_dst, *indirect, temp_reg);
3834 }
3835 } else
3836 *index += array_index->value.u[0] * *array_elements;
3837
3838 *array_elements *= deref_arr->array->type->length;
3839
3840 calc_deref_offsets(deref_arr->array->as_dereference(), array_elements, index, indirect, location);
3841 break;
3842 }
3843 default:
3844 break;
3845 }
3846 }
3847
3848 void
3849 glsl_to_tgsi_visitor::get_deref_offsets(ir_dereference *ir,
3850 unsigned *array_size,
3851 unsigned *base,
3852 uint16_t *index,
3853 st_src_reg *reladdr,
3854 bool opaque)
3855 {
3856 GLuint shader = _mesa_program_enum_to_shader_stage(this->prog->Target);
3857 unsigned location = 0;
3858 ir_variable *var = ir->variable_referenced();
3859
3860 memset(reladdr, 0, sizeof(*reladdr));
3861 reladdr->file = PROGRAM_UNDEFINED;
3862
3863 *base = 0;
3864 *array_size = 1;
3865
3866 assert(var);
3867 location = var->data.location;
3868 calc_deref_offsets(ir, array_size, index, reladdr, &location);
3869
3870 /*
3871 * If we end up with no indirect then adjust the base to the index,
3872 * and set the array size to 1.
3873 */
3874 if (reladdr->file == PROGRAM_UNDEFINED) {
3875 *base = *index;
3876 *array_size = 1;
3877 }
3878
3879 if (opaque) {
3880 assert(location != 0xffffffff);
3881 *base += this->shader_program->UniformStorage[location].opaque[shader].index;
3882 *index += this->shader_program->UniformStorage[location].opaque[shader].index;
3883 }
3884 }
3885
3886 st_src_reg
3887 glsl_to_tgsi_visitor::canonicalize_gather_offset(st_src_reg offset)
3888 {
3889 if (offset.reladdr || offset.reladdr2) {
3890 st_src_reg tmp = get_temp(glsl_type::ivec2_type);
3891 st_dst_reg tmp_dst = st_dst_reg(tmp);
3892 tmp_dst.writemask = WRITEMASK_XY;
3893 emit_asm(NULL, TGSI_OPCODE_MOV, tmp_dst, offset);
3894 return tmp;
3895 }
3896
3897 return offset;
3898 }
3899
3900 void
3901 glsl_to_tgsi_visitor::visit(ir_texture *ir)
3902 {
3903 st_src_reg result_src, coord, cube_sc, lod_info, projector, dx, dy;
3904 st_src_reg offset[MAX_GLSL_TEXTURE_OFFSET], sample_index, component;
3905 st_src_reg levels_src, reladdr;
3906 st_dst_reg result_dst, coord_dst, cube_sc_dst;
3907 glsl_to_tgsi_instruction *inst = NULL;
3908 unsigned opcode = TGSI_OPCODE_NOP;
3909 const glsl_type *sampler_type = ir->sampler->type;
3910 unsigned sampler_array_size = 1, sampler_base = 0;
3911 uint16_t sampler_index = 0;
3912 bool is_cube_array = false;
3913 unsigned i;
3914
3915 /* if we are a cube array sampler */
3916 if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
3917 sampler_type->sampler_array)) {
3918 is_cube_array = true;
3919 }
3920
3921 if (ir->coordinate) {
3922 ir->coordinate->accept(this);
3923
3924 /* Put our coords in a temp. We'll need to modify them for shadow,
3925 * projection, or LOD, so the only case we'd use it as-is is if
3926 * we're doing plain old texturing. The optimization passes on
3927 * glsl_to_tgsi_visitor should handle cleaning up our mess in that case.
3928 */
3929 coord = get_temp(glsl_type::vec4_type);
3930 coord_dst = st_dst_reg(coord);
3931 coord_dst.writemask = (1 << ir->coordinate->type->vector_elements) - 1;
3932 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
3933 }
3934
3935 if (ir->projector) {
3936 ir->projector->accept(this);
3937 projector = this->result;
3938 }
3939
3940 /* Storage for our result. Ideally for an assignment we'd be using
3941 * the actual storage for the result here, instead.
3942 */
3943 result_src = get_temp(ir->type);
3944 result_dst = st_dst_reg(result_src);
3945
3946 switch (ir->op) {
3947 case ir_tex:
3948 opcode = (is_cube_array && ir->shadow_comparitor) ? TGSI_OPCODE_TEX2 : TGSI_OPCODE_TEX;
3949 if (ir->offset) {
3950 ir->offset->accept(this);
3951 offset[0] = this->result;
3952 }
3953 break;
3954 case ir_txb:
3955 if (is_cube_array ||
3956 sampler_type == glsl_type::samplerCubeShadow_type) {
3957 opcode = TGSI_OPCODE_TXB2;
3958 }
3959 else {
3960 opcode = TGSI_OPCODE_TXB;
3961 }
3962 ir->lod_info.bias->accept(this);
3963 lod_info = this->result;
3964 if (ir->offset) {
3965 ir->offset->accept(this);
3966 offset[0] = this->result;
3967 }
3968 break;
3969 case ir_txl:
3970 opcode = is_cube_array ? TGSI_OPCODE_TXL2 : TGSI_OPCODE_TXL;
3971 ir->lod_info.lod->accept(this);
3972 lod_info = this->result;
3973 if (ir->offset) {
3974 ir->offset->accept(this);
3975 offset[0] = this->result;
3976 }
3977 break;
3978 case ir_txd:
3979 opcode = TGSI_OPCODE_TXD;
3980 ir->lod_info.grad.dPdx->accept(this);
3981 dx = this->result;
3982 ir->lod_info.grad.dPdy->accept(this);
3983 dy = this->result;
3984 if (ir->offset) {
3985 ir->offset->accept(this);
3986 offset[0] = this->result;
3987 }
3988 break;
3989 case ir_txs:
3990 opcode = TGSI_OPCODE_TXQ;
3991 ir->lod_info.lod->accept(this);
3992 lod_info = this->result;
3993 break;
3994 case ir_query_levels:
3995 opcode = TGSI_OPCODE_TXQ;
3996 lod_info = undef_src;
3997 levels_src = get_temp(ir->type);
3998 break;
3999 case ir_txf:
4000 opcode = TGSI_OPCODE_TXF;
4001 ir->lod_info.lod->accept(this);
4002 lod_info = this->result;
4003 if (ir->offset) {
4004 ir->offset->accept(this);
4005 offset[0] = this->result;
4006 }
4007 break;
4008 case ir_txf_ms:
4009 opcode = TGSI_OPCODE_TXF;
4010 ir->lod_info.sample_index->accept(this);
4011 sample_index = this->result;
4012 break;
4013 case ir_tg4:
4014 opcode = TGSI_OPCODE_TG4;
4015 ir->lod_info.component->accept(this);
4016 component = this->result;
4017 if (ir->offset) {
4018 ir->offset->accept(this);
4019 if (ir->offset->type->base_type == GLSL_TYPE_ARRAY) {
4020 const glsl_type *elt_type = ir->offset->type->fields.array;
4021 for (i = 0; i < ir->offset->type->length; i++) {
4022 offset[i] = this->result;
4023 offset[i].index += i * type_size(elt_type);
4024 offset[i].type = elt_type->base_type;
4025 offset[i].swizzle = swizzle_for_size(elt_type->vector_elements);
4026 offset[i] = canonicalize_gather_offset(offset[i]);
4027 }
4028 } else {
4029 offset[0] = canonicalize_gather_offset(this->result);
4030 }
4031 }
4032 break;
4033 case ir_lod:
4034 opcode = TGSI_OPCODE_LODQ;
4035 break;
4036 case ir_texture_samples:
4037 opcode = TGSI_OPCODE_TXQS;
4038 break;
4039 case ir_samples_identical:
4040 unreachable("Unexpected ir_samples_identical opcode");
4041 }
4042
4043 if (ir->projector) {
4044 if (opcode == TGSI_OPCODE_TEX) {
4045 /* Slot the projector in as the last component of the coord. */
4046 coord_dst.writemask = WRITEMASK_W;
4047 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, projector);
4048 coord_dst.writemask = WRITEMASK_XYZW;
4049 opcode = TGSI_OPCODE_TXP;
4050 } else {
4051 st_src_reg coord_w = coord;
4052 coord_w.swizzle = SWIZZLE_WWWW;
4053
4054 /* For the other TEX opcodes there's no projective version
4055 * since the last slot is taken up by LOD info. Do the
4056 * projective divide now.
4057 */
4058 coord_dst.writemask = WRITEMASK_W;
4059 emit_asm(ir, TGSI_OPCODE_RCP, coord_dst, projector);
4060
4061 /* In the case where we have to project the coordinates "by hand,"
4062 * the shadow comparator value must also be projected.
4063 */
4064 st_src_reg tmp_src = coord;
4065 if (ir->shadow_comparitor) {
4066 /* Slot the shadow value in as the second to last component of the
4067 * coord.
4068 */
4069 ir->shadow_comparitor->accept(this);
4070
4071 tmp_src = get_temp(glsl_type::vec4_type);
4072 st_dst_reg tmp_dst = st_dst_reg(tmp_src);
4073
4074 /* Projective division not allowed for array samplers. */
4075 assert(!sampler_type->sampler_array);
4076
4077 tmp_dst.writemask = WRITEMASK_Z;
4078 emit_asm(ir, TGSI_OPCODE_MOV, tmp_dst, this->result);
4079
4080 tmp_dst.writemask = WRITEMASK_XY;
4081 emit_asm(ir, TGSI_OPCODE_MOV, tmp_dst, coord);
4082 }
4083
4084 coord_dst.writemask = WRITEMASK_XYZ;
4085 emit_asm(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w);
4086
4087 coord_dst.writemask = WRITEMASK_XYZW;
4088 coord.swizzle = SWIZZLE_XYZW;
4089 }
4090 }
4091
4092 /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow
4093 * comparator was put in the correct place (and projected) by the code,
4094 * above, that handles by-hand projection.
4095 */
4096 if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) {
4097 /* Slot the shadow value in as the second to last component of the
4098 * coord.
4099 */
4100 ir->shadow_comparitor->accept(this);
4101
4102 if (is_cube_array) {
4103 cube_sc = get_temp(glsl_type::float_type);
4104 cube_sc_dst = st_dst_reg(cube_sc);
4105 cube_sc_dst.writemask = WRITEMASK_X;
4106 emit_asm(ir, TGSI_OPCODE_MOV, cube_sc_dst, this->result);
4107 cube_sc_dst.writemask = WRITEMASK_X;
4108 }
4109 else {
4110 if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D &&
4111 sampler_type->sampler_array) ||
4112 sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) {
4113 coord_dst.writemask = WRITEMASK_W;
4114 } else {
4115 coord_dst.writemask = WRITEMASK_Z;
4116 }
4117 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
4118 coord_dst.writemask = WRITEMASK_XYZW;
4119 }
4120 }
4121
4122 if (ir->op == ir_txf_ms) {
4123 coord_dst.writemask = WRITEMASK_W;
4124 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, sample_index);
4125 coord_dst.writemask = WRITEMASK_XYZW;
4126 } else if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB ||
4127 opcode == TGSI_OPCODE_TXF) {
4128 /* TGSI stores LOD or LOD bias in the last channel of the coords. */
4129 coord_dst.writemask = WRITEMASK_W;
4130 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, lod_info);
4131 coord_dst.writemask = WRITEMASK_XYZW;
4132 }
4133
4134 get_deref_offsets(ir->sampler, &sampler_array_size, &sampler_base,
4135 &sampler_index, &reladdr, true);
4136 if (reladdr.file != PROGRAM_UNDEFINED)
4137 emit_arl(ir, sampler_reladdr, reladdr);
4138
4139 if (opcode == TGSI_OPCODE_TXD)
4140 inst = emit_asm(ir, opcode, result_dst, coord, dx, dy);
4141 else if (opcode == TGSI_OPCODE_TXQ) {
4142 if (ir->op == ir_query_levels) {
4143 /* the level is stored in W */
4144 inst = emit_asm(ir, opcode, st_dst_reg(levels_src), lod_info);
4145 result_dst.writemask = WRITEMASK_X;
4146 levels_src.swizzle = SWIZZLE_WWWW;
4147 emit_asm(ir, TGSI_OPCODE_MOV, result_dst, levels_src);
4148 } else
4149 inst = emit_asm(ir, opcode, result_dst, lod_info);
4150 } else if (opcode == TGSI_OPCODE_TXQS) {
4151 inst = emit_asm(ir, opcode, result_dst);
4152 } else if (opcode == TGSI_OPCODE_TXF) {
4153 inst = emit_asm(ir, opcode, result_dst, coord);
4154 } else if (opcode == TGSI_OPCODE_TXL2 || opcode == TGSI_OPCODE_TXB2) {
4155 inst = emit_asm(ir, opcode, result_dst, coord, lod_info);
4156 } else if (opcode == TGSI_OPCODE_TEX2) {
4157 inst = emit_asm(ir, opcode, result_dst, coord, cube_sc);
4158 } else if (opcode == TGSI_OPCODE_TG4) {
4159 if (is_cube_array && ir->shadow_comparitor) {
4160 inst = emit_asm(ir, opcode, result_dst, coord, cube_sc);
4161 } else {
4162 inst = emit_asm(ir, opcode, result_dst, coord, component);
4163 }
4164 } else
4165 inst = emit_asm(ir, opcode, result_dst, coord);
4166
4167 if (ir->shadow_comparitor)
4168 inst->tex_shadow = GL_TRUE;
4169
4170 inst->resource.index = sampler_index;
4171 inst->sampler_array_size = sampler_array_size;
4172 inst->sampler_base = sampler_base;
4173
4174 if (reladdr.file != PROGRAM_UNDEFINED) {
4175 inst->resource.reladdr = ralloc(mem_ctx, st_src_reg);
4176 memcpy(inst->resource.reladdr, &reladdr, sizeof(reladdr));
4177 }
4178
4179 if (ir->offset) {
4180 if (!inst->tex_offsets)
4181 inst->tex_offsets = rzalloc_array(inst, st_src_reg, MAX_GLSL_TEXTURE_OFFSET);
4182
4183 for (i = 0; i < MAX_GLSL_TEXTURE_OFFSET && offset[i].file != PROGRAM_UNDEFINED; i++)
4184 inst->tex_offsets[i] = offset[i];
4185 inst->tex_offset_num_offset = i;
4186 }
4187
4188 switch (sampler_type->sampler_dimensionality) {
4189 case GLSL_SAMPLER_DIM_1D:
4190 inst->tex_target = (sampler_type->sampler_array)
4191 ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
4192 break;
4193 case GLSL_SAMPLER_DIM_2D:
4194 inst->tex_target = (sampler_type->sampler_array)
4195 ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
4196 break;
4197 case GLSL_SAMPLER_DIM_3D:
4198 inst->tex_target = TEXTURE_3D_INDEX;
4199 break;
4200 case GLSL_SAMPLER_DIM_CUBE:
4201 inst->tex_target = (sampler_type->sampler_array)
4202 ? TEXTURE_CUBE_ARRAY_INDEX : TEXTURE_CUBE_INDEX;
4203 break;
4204 case GLSL_SAMPLER_DIM_RECT:
4205 inst->tex_target = TEXTURE_RECT_INDEX;
4206 break;
4207 case GLSL_SAMPLER_DIM_BUF:
4208 inst->tex_target = TEXTURE_BUFFER_INDEX;
4209 break;
4210 case GLSL_SAMPLER_DIM_EXTERNAL:
4211 inst->tex_target = TEXTURE_EXTERNAL_INDEX;
4212 break;
4213 case GLSL_SAMPLER_DIM_MS:
4214 inst->tex_target = (sampler_type->sampler_array)
4215 ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : TEXTURE_2D_MULTISAMPLE_INDEX;
4216 break;
4217 default:
4218 assert(!"Should not get here.");
4219 }
4220
4221 inst->tex_type = ir->type->base_type;
4222
4223 this->result = result_src;
4224 }
4225
4226 void
4227 glsl_to_tgsi_visitor::visit(ir_return *ir)
4228 {
4229 assert(!ir->get_value());
4230
4231 emit_asm(ir, TGSI_OPCODE_RET);
4232 }
4233
4234 void
4235 glsl_to_tgsi_visitor::visit(ir_discard *ir)
4236 {
4237 if (ir->condition) {
4238 ir->condition->accept(this);
4239 st_src_reg condition = this->result;
4240
4241 /* Convert the bool condition to a float so we can negate. */
4242 if (native_integers) {
4243 st_src_reg temp = get_temp(ir->condition->type);
4244 emit_asm(ir, TGSI_OPCODE_AND, st_dst_reg(temp),
4245 condition, st_src_reg_for_float(1.0));
4246 condition = temp;
4247 }
4248
4249 condition.negate = ~condition.negate;
4250 emit_asm(ir, TGSI_OPCODE_KILL_IF, undef_dst, condition);
4251 } else {
4252 /* unconditional kil */
4253 emit_asm(ir, TGSI_OPCODE_KILL);
4254 }
4255 }
4256
4257 void
4258 glsl_to_tgsi_visitor::visit(ir_if *ir)
4259 {
4260 unsigned if_opcode;
4261 glsl_to_tgsi_instruction *if_inst;
4262
4263 ir->condition->accept(this);
4264 assert(this->result.file != PROGRAM_UNDEFINED);
4265
4266 if_opcode = native_integers ? TGSI_OPCODE_UIF : TGSI_OPCODE_IF;
4267
4268 if_inst = emit_asm(ir->condition, if_opcode, undef_dst, this->result);
4269
4270 this->instructions.push_tail(if_inst);
4271
4272 visit_exec_list(&ir->then_instructions, this);
4273
4274 if (!ir->else_instructions.is_empty()) {
4275 emit_asm(ir->condition, TGSI_OPCODE_ELSE);
4276 visit_exec_list(&ir->else_instructions, this);
4277 }
4278
4279 if_inst = emit_asm(ir->condition, TGSI_OPCODE_ENDIF);
4280 }
4281
4282
4283 void
4284 glsl_to_tgsi_visitor::visit(ir_emit_vertex *ir)
4285 {
4286 assert(this->prog->Target == GL_GEOMETRY_PROGRAM_NV);
4287
4288 ir->stream->accept(this);
4289 emit_asm(ir, TGSI_OPCODE_EMIT, undef_dst, this->result);
4290 }
4291
4292 void
4293 glsl_to_tgsi_visitor::visit(ir_end_primitive *ir)
4294 {
4295 assert(this->prog->Target == GL_GEOMETRY_PROGRAM_NV);
4296
4297 ir->stream->accept(this);
4298 emit_asm(ir, TGSI_OPCODE_ENDPRIM, undef_dst, this->result);
4299 }
4300
4301 void
4302 glsl_to_tgsi_visitor::visit(ir_barrier *ir)
4303 {
4304 assert(this->prog->Target == GL_TESS_CONTROL_PROGRAM_NV ||
4305 this->prog->Target == GL_COMPUTE_PROGRAM_NV);
4306
4307 emit_asm(ir, TGSI_OPCODE_BARRIER);
4308 }
4309
4310 glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
4311 {
4312 STATIC_ASSERT(sizeof(samplers_used) * 8 >= PIPE_MAX_SAMPLERS);
4313
4314 result.file = PROGRAM_UNDEFINED;
4315 next_temp = 1;
4316 array_sizes = NULL;
4317 max_num_arrays = 0;
4318 next_array = 0;
4319 num_inputs = 0;
4320 num_outputs = 0;
4321 num_input_arrays = 0;
4322 num_output_arrays = 0;
4323 num_immediates = 0;
4324 num_address_regs = 0;
4325 samplers_used = 0;
4326 buffers_used = 0;
4327 images_used = 0;
4328 indirect_addr_consts = false;
4329 wpos_transform_const = -1;
4330 glsl_version = 0;
4331 native_integers = false;
4332 mem_ctx = ralloc_context(NULL);
4333 ctx = NULL;
4334 prog = NULL;
4335 shader_program = NULL;
4336 shader = NULL;
4337 options = NULL;
4338 have_sqrt = false;
4339 have_fma = false;
4340 use_shared_memory = false;
4341 }
4342
4343 glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor()
4344 {
4345 free(array_sizes);
4346 ralloc_free(mem_ctx);
4347 }
4348
4349 extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v)
4350 {
4351 delete v;
4352 }
4353
4354
4355 /**
4356 * Count resources used by the given gpu program (number of texture
4357 * samplers, etc).
4358 */
4359 static void
4360 count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
4361 {
4362 v->samplers_used = 0;
4363 v->buffers_used = 0;
4364 v->images_used = 0;
4365
4366 foreach_in_list(glsl_to_tgsi_instruction, inst, &v->instructions) {
4367 if (inst->info->is_tex) {
4368 for (int i = 0; i < inst->sampler_array_size; i++) {
4369 unsigned idx = inst->sampler_base + i;
4370 v->samplers_used |= 1u << idx;
4371
4372 debug_assert(idx < (int)ARRAY_SIZE(v->sampler_types));
4373 v->sampler_types[idx] = inst->tex_type;
4374 v->sampler_targets[idx] =
4375 st_translate_texture_target(inst->tex_target, inst->tex_shadow);
4376
4377 if (inst->tex_shadow) {
4378 prog->ShadowSamplers |= 1 << (inst->resource.index + i);
4379 }
4380 }
4381 }
4382
4383 if (inst->tex_target == TEXTURE_EXTERNAL_INDEX)
4384 prog->ExternalSamplersUsed |= 1 << inst->resource.index;
4385
4386 if (inst->resource.file != PROGRAM_UNDEFINED && (
4387 is_resource_instruction(inst->op) ||
4388 inst->op == TGSI_OPCODE_STORE)) {
4389 if (inst->resource.file == PROGRAM_BUFFER) {
4390 v->buffers_used |= 1 << inst->resource.index;
4391 } else if (inst->resource.file == PROGRAM_MEMORY) {
4392 v->use_shared_memory = true;
4393 } else {
4394 assert(inst->resource.file == PROGRAM_IMAGE);
4395 for (int i = 0; i < inst->sampler_array_size; i++) {
4396 unsigned idx = inst->sampler_base + i;
4397 v->images_used |= 1 << idx;
4398 v->image_targets[idx] =
4399 st_translate_texture_target(inst->tex_target, false);
4400 v->image_formats[idx] = inst->image_format;
4401 }
4402 }
4403 }
4404 }
4405 prog->SamplersUsed = v->samplers_used;
4406
4407 if (v->shader_program != NULL)
4408 _mesa_update_shader_textures_used(v->shader_program, prog);
4409 }
4410
4411 /**
4412 * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
4413 * are read from the given src in this instruction
4414 */
4415 static int
4416 get_src_arg_mask(st_dst_reg dst, st_src_reg src)
4417 {
4418 int read_mask = 0, comp;
4419
4420 /* Now, given the src swizzle and the written channels, find which
4421 * components are actually read
4422 */
4423 for (comp = 0; comp < 4; ++comp) {
4424 const unsigned coord = GET_SWZ(src.swizzle, comp);
4425 assert(coord < 4);
4426 if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W)
4427 read_mask |= 1 << coord;
4428 }
4429
4430 return read_mask;
4431 }
4432
4433 /**
4434 * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP
4435 * instruction is the first instruction to write to register T0. There are
4436 * several lowering passes done in GLSL IR (e.g. branches and
4437 * relative addressing) that create a large number of conditional assignments
4438 * that ir_to_mesa converts to CMP instructions like the one mentioned above.
4439 *
4440 * Here is why this conversion is safe:
4441 * CMP T0, T1 T2 T0 can be expanded to:
4442 * if (T1 < 0.0)
4443 * MOV T0, T2;
4444 * else
4445 * MOV T0, T0;
4446 *
4447 * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same
4448 * as the original program. If (T1 < 0.0) evaluates to false, executing
4449 * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized.
4450 * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2
4451 * because any instruction that was going to read from T0 after this was going
4452 * to read a garbage value anyway.
4453 */
4454 void
4455 glsl_to_tgsi_visitor::simplify_cmp(void)
4456 {
4457 int tempWritesSize = 0;
4458 unsigned *tempWrites = NULL;
4459 unsigned outputWrites[VARYING_SLOT_TESS_MAX];
4460
4461 memset(outputWrites, 0, sizeof(outputWrites));
4462
4463 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
4464 unsigned prevWriteMask = 0;
4465
4466 /* Give up if we encounter relative addressing or flow control. */
4467 if (inst->dst[0].reladdr || inst->dst[0].reladdr2 ||
4468 inst->dst[1].reladdr || inst->dst[1].reladdr2 ||
4469 tgsi_get_opcode_info(inst->op)->is_branch ||
4470 inst->op == TGSI_OPCODE_CONT ||
4471 inst->op == TGSI_OPCODE_END ||
4472 inst->op == TGSI_OPCODE_RET) {
4473 break;
4474 }
4475
4476 if (inst->dst[0].file == PROGRAM_OUTPUT) {
4477 assert(inst->dst[0].index < (signed)ARRAY_SIZE(outputWrites));
4478 prevWriteMask = outputWrites[inst->dst[0].index];
4479 outputWrites[inst->dst[0].index] |= inst->dst[0].writemask;
4480 } else if (inst->dst[0].file == PROGRAM_TEMPORARY) {
4481 if (inst->dst[0].index >= tempWritesSize) {
4482 const int inc = 4096;
4483
4484 tempWrites = (unsigned*)
4485 realloc(tempWrites,
4486 (tempWritesSize + inc) * sizeof(unsigned));
4487 if (!tempWrites)
4488 return;
4489
4490 memset(tempWrites + tempWritesSize, 0, inc * sizeof(unsigned));
4491 tempWritesSize += inc;
4492 }
4493
4494 prevWriteMask = tempWrites[inst->dst[0].index];
4495 tempWrites[inst->dst[0].index] |= inst->dst[0].writemask;
4496 } else
4497 continue;
4498
4499 /* For a CMP to be considered a conditional write, the destination
4500 * register and source register two must be the same. */
4501 if (inst->op == TGSI_OPCODE_CMP
4502 && !(inst->dst[0].writemask & prevWriteMask)
4503 && inst->src[2].file == inst->dst[0].file
4504 && inst->src[2].index == inst->dst[0].index
4505 && inst->dst[0].writemask == get_src_arg_mask(inst->dst[0], inst->src[2])) {
4506
4507 inst->op = TGSI_OPCODE_MOV;
4508 inst->info = tgsi_get_opcode_info(inst->op);
4509 inst->src[0] = inst->src[1];
4510 }
4511 }
4512
4513 free(tempWrites);
4514 }
4515
4516 /* Replaces all references to a temporary register index with another index. */
4517 void
4518 glsl_to_tgsi_visitor::rename_temp_registers(int num_renames, struct rename_reg_pair *renames)
4519 {
4520 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
4521 unsigned j;
4522 int k;
4523 for (j = 0; j < num_inst_src_regs(inst); j++) {
4524 if (inst->src[j].file == PROGRAM_TEMPORARY)
4525 for (k = 0; k < num_renames; k++)
4526 if (inst->src[j].index == renames[k].old_reg)
4527 inst->src[j].index = renames[k].new_reg;
4528 }
4529
4530 for (j = 0; j < inst->tex_offset_num_offset; j++) {
4531 if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY)
4532 for (k = 0; k < num_renames; k++)
4533 if (inst->tex_offsets[j].index == renames[k].old_reg)
4534 inst->tex_offsets[j].index = renames[k].new_reg;
4535 }
4536
4537 for (j = 0; j < num_inst_dst_regs(inst); j++) {
4538 if (inst->dst[j].file == PROGRAM_TEMPORARY)
4539 for (k = 0; k < num_renames; k++)
4540 if (inst->dst[j].index == renames[k].old_reg)
4541 inst->dst[j].index = renames[k].new_reg;
4542 }
4543 }
4544 }
4545
4546 void
4547 glsl_to_tgsi_visitor::get_first_temp_read(int *first_reads)
4548 {
4549 int depth = 0; /* loop depth */
4550 int loop_start = -1; /* index of the first active BGNLOOP (if any) */
4551 unsigned i = 0, j;
4552
4553 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
4554 for (j = 0; j < num_inst_src_regs(inst); j++) {
4555 if (inst->src[j].file == PROGRAM_TEMPORARY) {
4556 if (first_reads[inst->src[j].index] == -1)
4557 first_reads[inst->src[j].index] = (depth == 0) ? i : loop_start;
4558 }
4559 }
4560 for (j = 0; j < inst->tex_offset_num_offset; j++) {
4561 if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) {
4562 if (first_reads[inst->tex_offsets[j].index] == -1)
4563 first_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : loop_start;
4564 }
4565 }
4566 if (inst->op == TGSI_OPCODE_BGNLOOP) {
4567 if(depth++ == 0)
4568 loop_start = i;
4569 } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
4570 if (--depth == 0)
4571 loop_start = -1;
4572 }
4573 assert(depth >= 0);
4574 i++;
4575 }
4576 }
4577
4578 void
4579 glsl_to_tgsi_visitor::get_last_temp_read_first_temp_write(int *last_reads, int *first_writes)
4580 {
4581 int depth = 0; /* loop depth */
4582 int loop_start = -1; /* index of the first active BGNLOOP (if any) */
4583 unsigned i = 0, j;
4584 int k;
4585 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
4586 for (j = 0; j < num_inst_src_regs(inst); j++) {
4587 if (inst->src[j].file == PROGRAM_TEMPORARY)
4588 last_reads[inst->src[j].index] = (depth == 0) ? i : -2;
4589 }
4590 for (j = 0; j < num_inst_dst_regs(inst); j++) {
4591 if (inst->dst[j].file == PROGRAM_TEMPORARY) {
4592 if (first_writes[inst->dst[j].index] == -1)
4593 first_writes[inst->dst[j].index] = (depth == 0) ? i : loop_start;
4594 last_reads[inst->dst[j].index] = (depth == 0) ? i : -2;
4595 }
4596 }
4597 for (j = 0; j < inst->tex_offset_num_offset; j++) {
4598 if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY)
4599 last_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : -2;
4600 }
4601 if (inst->op == TGSI_OPCODE_BGNLOOP) {
4602 if(depth++ == 0)
4603 loop_start = i;
4604 } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
4605 if (--depth == 0) {
4606 loop_start = -1;
4607 for (k = 0; k < this->next_temp; k++) {
4608 if (last_reads[k] == -2) {
4609 last_reads[k] = i;
4610 }
4611 }
4612 }
4613 }
4614 assert(depth >= 0);
4615 i++;
4616 }
4617 }
4618
4619 void
4620 glsl_to_tgsi_visitor::get_last_temp_write(int *last_writes)
4621 {
4622 int depth = 0; /* loop depth */
4623 int i = 0, k;
4624 unsigned j;
4625
4626 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
4627 for (j = 0; j < num_inst_dst_regs(inst); j++) {
4628 if (inst->dst[j].file == PROGRAM_TEMPORARY)
4629 last_writes[inst->dst[j].index] = (depth == 0) ? i : -2;
4630 }
4631
4632 if (inst->op == TGSI_OPCODE_BGNLOOP)
4633 depth++;
4634 else if (inst->op == TGSI_OPCODE_ENDLOOP)
4635 if (--depth == 0) {
4636 for (k = 0; k < this->next_temp; k++) {
4637 if (last_writes[k] == -2) {
4638 last_writes[k] = i;
4639 }
4640 }
4641 }
4642 assert(depth >= 0);
4643 i++;
4644 }
4645 }
4646
4647 /*
4648 * On a basic block basis, tracks available PROGRAM_TEMPORARY register
4649 * channels for copy propagation and updates following instructions to
4650 * use the original versions.
4651 *
4652 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
4653 * will occur. As an example, a TXP production before this pass:
4654 *
4655 * 0: MOV TEMP[1], INPUT[4].xyyy;
4656 * 1: MOV TEMP[1].w, INPUT[4].wwww;
4657 * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
4658 *
4659 * and after:
4660 *
4661 * 0: MOV TEMP[1], INPUT[4].xyyy;
4662 * 1: MOV TEMP[1].w, INPUT[4].wwww;
4663 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
4664 *
4665 * which allows for dead code elimination on TEMP[1]'s writes.
4666 */
4667 void
4668 glsl_to_tgsi_visitor::copy_propagate(void)
4669 {
4670 glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx,
4671 glsl_to_tgsi_instruction *,
4672 this->next_temp * 4);
4673 int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
4674 int level = 0;
4675
4676 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
4677 assert(inst->dst[0].file != PROGRAM_TEMPORARY
4678 || inst->dst[0].index < this->next_temp);
4679
4680 /* First, do any copy propagation possible into the src regs. */
4681 for (int r = 0; r < 3; r++) {
4682 glsl_to_tgsi_instruction *first = NULL;
4683 bool good = true;
4684 int acp_base = inst->src[r].index * 4;
4685
4686 if (inst->src[r].file != PROGRAM_TEMPORARY ||
4687 inst->src[r].reladdr ||
4688 inst->src[r].reladdr2)
4689 continue;
4690
4691 /* See if we can find entries in the ACP consisting of MOVs
4692 * from the same src register for all the swizzled channels
4693 * of this src register reference.
4694 */
4695 for (int i = 0; i < 4; i++) {
4696 int src_chan = GET_SWZ(inst->src[r].swizzle, i);
4697 glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan];
4698
4699 if (!copy_chan) {
4700 good = false;
4701 break;
4702 }
4703
4704 assert(acp_level[acp_base + src_chan] <= level);
4705
4706 if (!first) {
4707 first = copy_chan;
4708 } else {
4709 if (first->src[0].file != copy_chan->src[0].file ||
4710 first->src[0].index != copy_chan->src[0].index ||
4711 first->src[0].double_reg2 != copy_chan->src[0].double_reg2 ||
4712 first->src[0].index2D != copy_chan->src[0].index2D) {
4713 good = false;
4714 break;
4715 }
4716 }
4717 }
4718
4719 if (good) {
4720 /* We've now validated that we can copy-propagate to
4721 * replace this src register reference. Do it.
4722 */
4723 inst->src[r].file = first->src[0].file;
4724 inst->src[r].index = first->src[0].index;
4725 inst->src[r].index2D = first->src[0].index2D;
4726 inst->src[r].has_index2 = first->src[0].has_index2;
4727 inst->src[r].double_reg2 = first->src[0].double_reg2;
4728 inst->src[r].array_id = first->src[0].array_id;
4729
4730 int swizzle = 0;
4731 for (int i = 0; i < 4; i++) {
4732 int src_chan = GET_SWZ(inst->src[r].swizzle, i);
4733 glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan];
4734 swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) << (3 * i));
4735 }
4736 inst->src[r].swizzle = swizzle;
4737 }
4738 }
4739
4740 switch (inst->op) {
4741 case TGSI_OPCODE_BGNLOOP:
4742 case TGSI_OPCODE_ENDLOOP:
4743 /* End of a basic block, clear the ACP entirely. */
4744 memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
4745 break;
4746
4747 case TGSI_OPCODE_IF:
4748 case TGSI_OPCODE_UIF:
4749 ++level;
4750 break;
4751
4752 case TGSI_OPCODE_ENDIF:
4753 case TGSI_OPCODE_ELSE:
4754 /* Clear all channels written inside the block from the ACP, but
4755 * leaving those that were not touched.
4756 */
4757 for (int r = 0; r < this->next_temp; r++) {
4758 for (int c = 0; c < 4; c++) {
4759 if (!acp[4 * r + c])
4760 continue;
4761
4762 if (acp_level[4 * r + c] >= level)
4763 acp[4 * r + c] = NULL;
4764 }
4765 }
4766 if (inst->op == TGSI_OPCODE_ENDIF)
4767 --level;
4768 break;
4769
4770 default:
4771 /* Continuing the block, clear any written channels from
4772 * the ACP.
4773 */
4774 for (int d = 0; d < 2; d++) {
4775 if (inst->dst[d].file == PROGRAM_TEMPORARY && inst->dst[d].reladdr) {
4776 /* Any temporary might be written, so no copy propagation
4777 * across this instruction.
4778 */
4779 memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
4780 } else if (inst->dst[d].file == PROGRAM_OUTPUT &&
4781 inst->dst[d].reladdr) {
4782 /* Any output might be written, so no copy propagation
4783 * from outputs across this instruction.
4784 */
4785 for (int r = 0; r < this->next_temp; r++) {
4786 for (int c = 0; c < 4; c++) {
4787 if (!acp[4 * r + c])
4788 continue;
4789
4790 if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT)
4791 acp[4 * r + c] = NULL;
4792 }
4793 }
4794 } else if (inst->dst[d].file == PROGRAM_TEMPORARY ||
4795 inst->dst[d].file == PROGRAM_OUTPUT) {
4796 /* Clear where it's used as dst. */
4797 if (inst->dst[d].file == PROGRAM_TEMPORARY) {
4798 for (int c = 0; c < 4; c++) {
4799 if (inst->dst[d].writemask & (1 << c))
4800 acp[4 * inst->dst[d].index + c] = NULL;
4801 }
4802 }
4803
4804 /* Clear where it's used as src. */
4805 for (int r = 0; r < this->next_temp; r++) {
4806 for (int c = 0; c < 4; c++) {
4807 if (!acp[4 * r + c])
4808 continue;
4809
4810 int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c);
4811
4812 if (acp[4 * r + c]->src[0].file == inst->dst[d].file &&
4813 acp[4 * r + c]->src[0].index == inst->dst[d].index &&
4814 inst->dst[d].writemask & (1 << src_chan)) {
4815 acp[4 * r + c] = NULL;
4816 }
4817 }
4818 }
4819 }
4820 }
4821 break;
4822 }
4823
4824 /* If this is a copy, add it to the ACP. */
4825 if (inst->op == TGSI_OPCODE_MOV &&
4826 inst->dst[0].file == PROGRAM_TEMPORARY &&
4827 !(inst->dst[0].file == inst->src[0].file &&
4828 inst->dst[0].index == inst->src[0].index) &&
4829 !inst->dst[0].reladdr &&
4830 !inst->dst[0].reladdr2 &&
4831 !inst->saturate &&
4832 inst->src[0].file != PROGRAM_ARRAY &&
4833 !inst->src[0].reladdr &&
4834 !inst->src[0].reladdr2 &&
4835 !inst->src[0].negate) {
4836 for (int i = 0; i < 4; i++) {
4837 if (inst->dst[0].writemask & (1 << i)) {
4838 acp[4 * inst->dst[0].index + i] = inst;
4839 acp_level[4 * inst->dst[0].index + i] = level;
4840 }
4841 }
4842 }
4843 }
4844
4845 ralloc_free(acp_level);
4846 ralloc_free(acp);
4847 }
4848
4849 /*
4850 * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead
4851 * code elimination.
4852 *
4853 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
4854 * will occur. As an example, a TXP production after copy propagation but
4855 * before this pass:
4856 *
4857 * 0: MOV TEMP[1], INPUT[4].xyyy;
4858 * 1: MOV TEMP[1].w, INPUT[4].wwww;
4859 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
4860 *
4861 * and after this pass:
4862 *
4863 * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
4864 */
4865 int
4866 glsl_to_tgsi_visitor::eliminate_dead_code(void)
4867 {
4868 glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx,
4869 glsl_to_tgsi_instruction *,
4870 this->next_temp * 4);
4871 int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
4872 int level = 0;
4873 int removed = 0;
4874
4875 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
4876 assert(inst->dst[0].file != PROGRAM_TEMPORARY
4877 || inst->dst[0].index < this->next_temp);
4878
4879 switch (inst->op) {
4880 case TGSI_OPCODE_BGNLOOP:
4881 case TGSI_OPCODE_ENDLOOP:
4882 case TGSI_OPCODE_CONT:
4883 case TGSI_OPCODE_BRK:
4884 /* End of a basic block, clear the write array entirely.
4885 *
4886 * This keeps us from killing dead code when the writes are
4887 * on either side of a loop, even when the register isn't touched
4888 * inside the loop. However, glsl_to_tgsi_visitor doesn't seem to emit
4889 * dead code of this type, so it shouldn't make a difference as long as
4890 * the dead code elimination pass in the GLSL compiler does its job.
4891 */
4892 memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
4893 break;
4894
4895 case TGSI_OPCODE_ENDIF:
4896 case TGSI_OPCODE_ELSE:
4897 /* Promote the recorded level of all channels written inside the
4898 * preceding if or else block to the level above the if/else block.
4899 */
4900 for (int r = 0; r < this->next_temp; r++) {
4901 for (int c = 0; c < 4; c++) {
4902 if (!writes[4 * r + c])
4903 continue;
4904
4905 if (write_level[4 * r + c] == level)
4906 write_level[4 * r + c] = level-1;
4907 }
4908 }
4909 if(inst->op == TGSI_OPCODE_ENDIF)
4910 --level;
4911 break;
4912
4913 case TGSI_OPCODE_IF:
4914 case TGSI_OPCODE_UIF:
4915 ++level;
4916 /* fallthrough to default case to mark the condition as read */
4917 default:
4918 /* Continuing the block, clear any channels from the write array that
4919 * are read by this instruction.
4920 */
4921 for (unsigned i = 0; i < ARRAY_SIZE(inst->src); i++) {
4922 if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){
4923 /* Any temporary might be read, so no dead code elimination
4924 * across this instruction.
4925 */
4926 memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
4927 } else if (inst->src[i].file == PROGRAM_TEMPORARY) {
4928 /* Clear where it's used as src. */
4929 int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0);
4930 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1);
4931 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2);
4932 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3);
4933
4934 for (int c = 0; c < 4; c++) {
4935 if (src_chans & (1 << c))
4936 writes[4 * inst->src[i].index + c] = NULL;
4937 }
4938 }
4939 }
4940 for (unsigned i = 0; i < inst->tex_offset_num_offset; i++) {
4941 if (inst->tex_offsets[i].file == PROGRAM_TEMPORARY && inst->tex_offsets[i].reladdr){
4942 /* Any temporary might be read, so no dead code elimination
4943 * across this instruction.
4944 */
4945 memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
4946 } else if (inst->tex_offsets[i].file == PROGRAM_TEMPORARY) {
4947 /* Clear where it's used as src. */
4948 int src_chans = 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 0);
4949 src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 1);
4950 src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 2);
4951 src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 3);
4952
4953 for (int c = 0; c < 4; c++) {
4954 if (src_chans & (1 << c))
4955 writes[4 * inst->tex_offsets[i].index + c] = NULL;
4956 }
4957 }
4958 }
4959 break;
4960 }
4961
4962 /* If this instruction writes to a temporary, add it to the write array.
4963 * If there is already an instruction in the write array for one or more
4964 * of the channels, flag that channel write as dead.
4965 */
4966 for (unsigned i = 0; i < ARRAY_SIZE(inst->dst); i++) {
4967 if (inst->dst[i].file == PROGRAM_TEMPORARY &&
4968 !inst->dst[i].reladdr) {
4969 for (int c = 0; c < 4; c++) {
4970 if (inst->dst[i].writemask & (1 << c)) {
4971 if (writes[4 * inst->dst[i].index + c]) {
4972 if (write_level[4 * inst->dst[i].index + c] < level)
4973 continue;
4974 else
4975 writes[4 * inst->dst[i].index + c]->dead_mask |= (1 << c);
4976 }
4977 writes[4 * inst->dst[i].index + c] = inst;
4978 write_level[4 * inst->dst[i].index + c] = level;
4979 }
4980 }
4981 }
4982 }
4983 }
4984
4985 /* Anything still in the write array at this point is dead code. */
4986 for (int r = 0; r < this->next_temp; r++) {
4987 for (int c = 0; c < 4; c++) {
4988 glsl_to_tgsi_instruction *inst = writes[4 * r + c];
4989 if (inst)
4990 inst->dead_mask |= (1 << c);
4991 }
4992 }
4993
4994 /* Now actually remove the instructions that are completely dead and update
4995 * the writemask of other instructions with dead channels.
4996 */
4997 foreach_in_list_safe(glsl_to_tgsi_instruction, inst, &this->instructions) {
4998 if (!inst->dead_mask || !inst->dst[0].writemask)
4999 continue;
5000 /* No amount of dead masks should remove memory stores */
5001 if (inst->info->is_store)
5002 continue;
5003
5004 if ((inst->dst[0].writemask & ~inst->dead_mask) == 0) {
5005 inst->remove();
5006 delete inst;
5007 removed++;
5008 } else {
5009 if (glsl_base_type_is_64bit(inst->dst[0].type)) {
5010 if (inst->dead_mask == WRITEMASK_XY ||
5011 inst->dead_mask == WRITEMASK_ZW)
5012 inst->dst[0].writemask &= ~(inst->dead_mask);
5013 } else
5014 inst->dst[0].writemask &= ~(inst->dead_mask);
5015 }
5016 }
5017
5018 ralloc_free(write_level);
5019 ralloc_free(writes);
5020
5021 return removed;
5022 }
5023
5024 /* merge DFRACEXP instructions into one. */
5025 void
5026 glsl_to_tgsi_visitor::merge_two_dsts(void)
5027 {
5028 foreach_in_list_safe(glsl_to_tgsi_instruction, inst, &this->instructions) {
5029 glsl_to_tgsi_instruction *inst2;
5030 bool merged;
5031 if (num_inst_dst_regs(inst) != 2)
5032 continue;
5033
5034 if (inst->dst[0].file != PROGRAM_UNDEFINED &&
5035 inst->dst[1].file != PROGRAM_UNDEFINED)
5036 continue;
5037
5038 inst2 = (glsl_to_tgsi_instruction *) inst->next;
5039 do {
5040
5041 if (inst->src[0].file == inst2->src[0].file &&
5042 inst->src[0].index == inst2->src[0].index &&
5043 inst->src[0].type == inst2->src[0].type &&
5044 inst->src[0].swizzle == inst2->src[0].swizzle)
5045 break;
5046 inst2 = (glsl_to_tgsi_instruction *) inst2->next;
5047 } while (inst2);
5048
5049 if (!inst2)
5050 continue;
5051 merged = false;
5052 if (inst->dst[0].file == PROGRAM_UNDEFINED) {
5053 merged = true;
5054 inst->dst[0] = inst2->dst[0];
5055 } else if (inst->dst[1].file == PROGRAM_UNDEFINED) {
5056 inst->dst[1] = inst2->dst[1];
5057 merged = true;
5058 }
5059
5060 if (merged) {
5061 inst2->remove();
5062 delete inst2;
5063 }
5064 }
5065 }
5066
5067 /* Merges temporary registers together where possible to reduce the number of
5068 * registers needed to run a program.
5069 *
5070 * Produces optimal code only after copy propagation and dead code elimination
5071 * have been run. */
5072 void
5073 glsl_to_tgsi_visitor::merge_registers(void)
5074 {
5075 int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp);
5076 int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp);
5077 struct rename_reg_pair *renames = rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp);
5078 int i, j;
5079 int num_renames = 0;
5080
5081 /* Read the indices of the last read and first write to each temp register
5082 * into an array so that we don't have to traverse the instruction list as
5083 * much. */
5084 for (i = 0; i < this->next_temp; i++) {
5085 last_reads[i] = -1;
5086 first_writes[i] = -1;
5087 }
5088 get_last_temp_read_first_temp_write(last_reads, first_writes);
5089
5090 /* Start looking for registers with non-overlapping usages that can be
5091 * merged together. */
5092 for (i = 0; i < this->next_temp; i++) {
5093 /* Don't touch unused registers. */
5094 if (last_reads[i] < 0 || first_writes[i] < 0) continue;
5095
5096 for (j = 0; j < this->next_temp; j++) {
5097 /* Don't touch unused registers. */
5098 if (last_reads[j] < 0 || first_writes[j] < 0) continue;
5099
5100 /* We can merge the two registers if the first write to j is after or
5101 * in the same instruction as the last read from i. Note that the
5102 * register at index i will always be used earlier or at the same time
5103 * as the register at index j. */
5104 if (first_writes[i] <= first_writes[j] &&
5105 last_reads[i] <= first_writes[j]) {
5106 renames[num_renames].old_reg = j;
5107 renames[num_renames].new_reg = i;
5108 num_renames++;
5109
5110 /* Update the first_writes and last_reads arrays with the new
5111 * values for the merged register index, and mark the newly unused
5112 * register index as such. */
5113 assert(last_reads[j] >= last_reads[i]);
5114 last_reads[i] = last_reads[j];
5115 first_writes[j] = -1;
5116 last_reads[j] = -1;
5117 }
5118 }
5119 }
5120
5121 rename_temp_registers(num_renames, renames);
5122 ralloc_free(renames);
5123 ralloc_free(last_reads);
5124 ralloc_free(first_writes);
5125 }
5126
5127 /* Reassign indices to temporary registers by reusing unused indices created
5128 * by optimization passes. */
5129 void
5130 glsl_to_tgsi_visitor::renumber_registers(void)
5131 {
5132 int i = 0;
5133 int new_index = 0;
5134 int *first_reads = rzalloc_array(mem_ctx, int, this->next_temp);
5135 struct rename_reg_pair *renames = rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp);
5136 int num_renames = 0;
5137 for (i = 0; i < this->next_temp; i++) {
5138 first_reads[i] = -1;
5139 }
5140 get_first_temp_read(first_reads);
5141
5142 for (i = 0; i < this->next_temp; i++) {
5143 if (first_reads[i] < 0) continue;
5144 if (i != new_index) {
5145 renames[num_renames].old_reg = i;
5146 renames[num_renames].new_reg = new_index;
5147 num_renames++;
5148 }
5149 new_index++;
5150 }
5151
5152 rename_temp_registers(num_renames, renames);
5153 this->next_temp = new_index;
5154 ralloc_free(renames);
5155 ralloc_free(first_reads);
5156 }
5157
5158 /* ------------------------- TGSI conversion stuff -------------------------- */
5159
5160 /**
5161 * Intermediate state used during shader translation.
5162 */
5163 struct st_translate {
5164 struct ureg_program *ureg;
5165
5166 unsigned temps_size;
5167 struct ureg_dst *temps;
5168
5169 struct ureg_dst *arrays;
5170 unsigned num_temp_arrays;
5171 struct ureg_src *constants;
5172 int num_constants;
5173 struct ureg_src *immediates;
5174 int num_immediates;
5175 struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
5176 struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
5177 struct ureg_dst address[3];
5178 struct ureg_src samplers[PIPE_MAX_SAMPLERS];
5179 struct ureg_src buffers[PIPE_MAX_SHADER_BUFFERS];
5180 struct ureg_src images[PIPE_MAX_SHADER_IMAGES];
5181 struct ureg_src systemValues[SYSTEM_VALUE_MAX];
5182 struct ureg_src shared_memory;
5183 unsigned *array_sizes;
5184 struct inout_decl *input_decls;
5185 unsigned num_input_decls;
5186 struct inout_decl *output_decls;
5187 unsigned num_output_decls;
5188
5189 const GLuint *inputMapping;
5190 const GLuint *outputMapping;
5191
5192 unsigned procType; /**< PIPE_SHADER_VERTEX/FRAGMENT */
5193 };
5194
5195 /** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */
5196 unsigned
5197 _mesa_sysval_to_semantic(unsigned sysval)
5198 {
5199 switch (sysval) {
5200 /* Vertex shader */
5201 case SYSTEM_VALUE_VERTEX_ID:
5202 return TGSI_SEMANTIC_VERTEXID;
5203 case SYSTEM_VALUE_INSTANCE_ID:
5204 return TGSI_SEMANTIC_INSTANCEID;
5205 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
5206 return TGSI_SEMANTIC_VERTEXID_NOBASE;
5207 case SYSTEM_VALUE_BASE_VERTEX:
5208 return TGSI_SEMANTIC_BASEVERTEX;
5209 case SYSTEM_VALUE_BASE_INSTANCE:
5210 return TGSI_SEMANTIC_BASEINSTANCE;
5211 case SYSTEM_VALUE_DRAW_ID:
5212 return TGSI_SEMANTIC_DRAWID;
5213
5214 /* Geometry shader */
5215 case SYSTEM_VALUE_INVOCATION_ID:
5216 return TGSI_SEMANTIC_INVOCATIONID;
5217
5218 /* Fragment shader */
5219 case SYSTEM_VALUE_FRAG_COORD:
5220 return TGSI_SEMANTIC_POSITION;
5221 case SYSTEM_VALUE_FRONT_FACE:
5222 return TGSI_SEMANTIC_FACE;
5223 case SYSTEM_VALUE_SAMPLE_ID:
5224 return TGSI_SEMANTIC_SAMPLEID;
5225 case SYSTEM_VALUE_SAMPLE_POS:
5226 return TGSI_SEMANTIC_SAMPLEPOS;
5227 case SYSTEM_VALUE_SAMPLE_MASK_IN:
5228 return TGSI_SEMANTIC_SAMPLEMASK;
5229 case SYSTEM_VALUE_HELPER_INVOCATION:
5230 return TGSI_SEMANTIC_HELPER_INVOCATION;
5231
5232 /* Tessellation shader */
5233 case SYSTEM_VALUE_TESS_COORD:
5234 return TGSI_SEMANTIC_TESSCOORD;
5235 case SYSTEM_VALUE_VERTICES_IN:
5236 return TGSI_SEMANTIC_VERTICESIN;
5237 case SYSTEM_VALUE_PRIMITIVE_ID:
5238 return TGSI_SEMANTIC_PRIMID;
5239 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
5240 return TGSI_SEMANTIC_TESSOUTER;
5241 case SYSTEM_VALUE_TESS_LEVEL_INNER:
5242 return TGSI_SEMANTIC_TESSINNER;
5243
5244 /* Compute shader */
5245 case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
5246 return TGSI_SEMANTIC_THREAD_ID;
5247 case SYSTEM_VALUE_WORK_GROUP_ID:
5248 return TGSI_SEMANTIC_BLOCK_ID;
5249 case SYSTEM_VALUE_NUM_WORK_GROUPS:
5250 return TGSI_SEMANTIC_GRID_SIZE;
5251 case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
5252 return TGSI_SEMANTIC_BLOCK_SIZE;
5253
5254 /* Unhandled */
5255 case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX:
5256 case SYSTEM_VALUE_GLOBAL_INVOCATION_ID:
5257 case SYSTEM_VALUE_VERTEX_CNT:
5258 default:
5259 assert(!"Unexpected SYSTEM_VALUE_ enum");
5260 return TGSI_SEMANTIC_COUNT;
5261 }
5262 }
5263
5264 /**
5265 * Map a glsl_to_tgsi constant/immediate to a TGSI immediate.
5266 */
5267 static struct ureg_src
5268 emit_immediate(struct st_translate *t,
5269 gl_constant_value values[4],
5270 int type, int size)
5271 {
5272 struct ureg_program *ureg = t->ureg;
5273
5274 switch(type)
5275 {
5276 case GL_FLOAT:
5277 return ureg_DECL_immediate(ureg, &values[0].f, size);
5278 case GL_DOUBLE:
5279 return ureg_DECL_immediate_f64(ureg, (double *)&values[0].f, size);
5280 case GL_INT:
5281 return ureg_DECL_immediate_int(ureg, &values[0].i, size);
5282 case GL_UNSIGNED_INT:
5283 case GL_BOOL:
5284 return ureg_DECL_immediate_uint(ureg, &values[0].u, size);
5285 default:
5286 assert(!"should not get here - type must be float, int, uint, or bool");
5287 return ureg_src_undef();
5288 }
5289 }
5290
5291 /**
5292 * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register.
5293 */
5294 static struct ureg_dst
5295 dst_register(struct st_translate *t, gl_register_file file, unsigned index,
5296 unsigned array_id)
5297 {
5298 unsigned array;
5299
5300 switch(file) {
5301 case PROGRAM_UNDEFINED:
5302 return ureg_dst_undef();
5303
5304 case PROGRAM_TEMPORARY:
5305 /* Allocate space for temporaries on demand. */
5306 if (index >= t->temps_size) {
5307 const int inc = align(index - t->temps_size + 1, 4096);
5308
5309 t->temps = (struct ureg_dst*)
5310 realloc(t->temps,
5311 (t->temps_size + inc) * sizeof(struct ureg_dst));
5312 if (!t->temps)
5313 return ureg_dst_undef();
5314
5315 memset(t->temps + t->temps_size, 0, inc * sizeof(struct ureg_dst));
5316 t->temps_size += inc;
5317 }
5318
5319 if (ureg_dst_is_undef(t->temps[index]))
5320 t->temps[index] = ureg_DECL_local_temporary(t->ureg);
5321
5322 return t->temps[index];
5323
5324 case PROGRAM_ARRAY:
5325 assert(array_id && array_id <= t->num_temp_arrays);
5326 array = array_id - 1;
5327
5328 if (ureg_dst_is_undef(t->arrays[array]))
5329 t->arrays[array] = ureg_DECL_array_temporary(
5330 t->ureg, t->array_sizes[array], TRUE);
5331
5332 return ureg_dst_array_offset(t->arrays[array], index);
5333
5334 case PROGRAM_OUTPUT:
5335 if (!array_id) {
5336 if (t->procType == PIPE_SHADER_FRAGMENT)
5337 assert(index < 2 * FRAG_RESULT_MAX);
5338 else if (t->procType == PIPE_SHADER_TESS_CTRL ||
5339 t->procType == PIPE_SHADER_TESS_EVAL)
5340 assert(index < VARYING_SLOT_TESS_MAX);
5341 else
5342 assert(index < VARYING_SLOT_MAX);
5343
5344 assert(t->outputMapping[index] < ARRAY_SIZE(t->outputs));
5345 assert(t->outputs[t->outputMapping[index]].File != TGSI_FILE_NULL);
5346 return t->outputs[t->outputMapping[index]];
5347 }
5348 else {
5349 struct inout_decl *decl = find_inout_array(t->output_decls, t->num_output_decls, array_id);
5350 unsigned mesa_index = decl->mesa_index;
5351 int slot = t->outputMapping[mesa_index];
5352
5353 assert(slot != -1 && t->outputs[slot].File == TGSI_FILE_OUTPUT);
5354
5355 struct ureg_dst dst = t->outputs[slot];
5356 dst.ArrayID = array_id;
5357 return ureg_dst_array_offset(dst, index - mesa_index);
5358 }
5359
5360 case PROGRAM_ADDRESS:
5361 return t->address[index];
5362
5363 default:
5364 assert(!"unknown dst register file");
5365 return ureg_dst_undef();
5366 }
5367 }
5368
5369 /**
5370 * Map a glsl_to_tgsi src register to a TGSI ureg_src register.
5371 */
5372 static struct ureg_src
5373 src_register(struct st_translate *t, const st_src_reg *reg)
5374 {
5375 int index = reg->index;
5376 int double_reg2 = reg->double_reg2 ? 1 : 0;
5377
5378 switch(reg->file) {
5379 case PROGRAM_UNDEFINED:
5380 return ureg_imm4f(t->ureg, 0, 0, 0, 0);
5381
5382 case PROGRAM_TEMPORARY:
5383 case PROGRAM_ARRAY:
5384 case PROGRAM_OUTPUT:
5385 return ureg_src(dst_register(t, reg->file, reg->index, reg->array_id));
5386
5387 case PROGRAM_UNIFORM:
5388 assert(reg->index >= 0);
5389 return reg->index < t->num_constants ?
5390 t->constants[reg->index] : ureg_imm4f(t->ureg, 0, 0, 0, 0);
5391 case PROGRAM_STATE_VAR:
5392 case PROGRAM_CONSTANT: /* ie, immediate */
5393 if (reg->has_index2)
5394 return ureg_src_register(TGSI_FILE_CONSTANT, reg->index);
5395 else
5396 return reg->index >= 0 && reg->index < t->num_constants ?
5397 t->constants[reg->index] : ureg_imm4f(t->ureg, 0, 0, 0, 0);
5398
5399 case PROGRAM_IMMEDIATE:
5400 assert(reg->index >= 0 && reg->index < t->num_immediates);
5401 return t->immediates[reg->index];
5402
5403 case PROGRAM_INPUT:
5404 /* GLSL inputs are 64-bit containers, so we have to
5405 * map back to the original index and add the offset after
5406 * mapping. */
5407 index -= double_reg2;
5408 if (!reg->array_id) {
5409 assert(t->inputMapping[index] < ARRAY_SIZE(t->inputs));
5410 assert(t->inputs[t->inputMapping[index]].File != TGSI_FILE_NULL);
5411 return t->inputs[t->inputMapping[index] + double_reg2];
5412 }
5413 else {
5414 struct inout_decl *decl = find_inout_array(t->input_decls, t->num_input_decls, reg->array_id);
5415 unsigned mesa_index = decl->mesa_index;
5416 int slot = t->inputMapping[mesa_index];
5417
5418 assert(slot != -1 && t->inputs[slot].File == TGSI_FILE_INPUT);
5419
5420 struct ureg_src src = t->inputs[slot];
5421 src.ArrayID = reg->array_id;
5422 return ureg_src_array_offset(src, index + double_reg2 - mesa_index);
5423 }
5424
5425 case PROGRAM_ADDRESS:
5426 return ureg_src(t->address[reg->index]);
5427
5428 case PROGRAM_SYSTEM_VALUE:
5429 assert(reg->index < (int) ARRAY_SIZE(t->systemValues));
5430 return t->systemValues[reg->index];
5431
5432 default:
5433 assert(!"unknown src register file");
5434 return ureg_src_undef();
5435 }
5436 }
5437
5438 /**
5439 * Create a TGSI ureg_dst register from an st_dst_reg.
5440 */
5441 static struct ureg_dst
5442 translate_dst(struct st_translate *t,
5443 const st_dst_reg *dst_reg,
5444 bool saturate)
5445 {
5446 struct ureg_dst dst = dst_register(t, dst_reg->file, dst_reg->index,
5447 dst_reg->array_id);
5448
5449 if (dst.File == TGSI_FILE_NULL)
5450 return dst;
5451
5452 dst = ureg_writemask(dst, dst_reg->writemask);
5453
5454 if (saturate)
5455 dst = ureg_saturate(dst);
5456
5457 if (dst_reg->reladdr != NULL) {
5458 assert(dst_reg->file != PROGRAM_TEMPORARY);
5459 dst = ureg_dst_indirect(dst, ureg_src(t->address[0]));
5460 }
5461
5462 if (dst_reg->has_index2) {
5463 if (dst_reg->reladdr2)
5464 dst = ureg_dst_dimension_indirect(dst, ureg_src(t->address[1]),
5465 dst_reg->index2D);
5466 else
5467 dst = ureg_dst_dimension(dst, dst_reg->index2D);
5468 }
5469
5470 return dst;
5471 }
5472
5473 /**
5474 * Create a TGSI ureg_src register from an st_src_reg.
5475 */
5476 static struct ureg_src
5477 translate_src(struct st_translate *t, const st_src_reg *src_reg)
5478 {
5479 struct ureg_src src = src_register(t, src_reg);
5480
5481 if (src_reg->has_index2) {
5482 /* 2D indexes occur with geometry shader inputs (attrib, vertex)
5483 * and UBO constant buffers (buffer, position).
5484 */
5485 if (src_reg->reladdr2)
5486 src = ureg_src_dimension_indirect(src, ureg_src(t->address[1]),
5487 src_reg->index2D);
5488 else
5489 src = ureg_src_dimension(src, src_reg->index2D);
5490 }
5491
5492 src = ureg_swizzle(src,
5493 GET_SWZ(src_reg->swizzle, 0) & 0x3,
5494 GET_SWZ(src_reg->swizzle, 1) & 0x3,
5495 GET_SWZ(src_reg->swizzle, 2) & 0x3,
5496 GET_SWZ(src_reg->swizzle, 3) & 0x3);
5497
5498 if ((src_reg->negate & 0xf) == NEGATE_XYZW)
5499 src = ureg_negate(src);
5500
5501 if (src_reg->reladdr != NULL) {
5502 assert(src_reg->file != PROGRAM_TEMPORARY);
5503 src = ureg_src_indirect(src, ureg_src(t->address[0]));
5504 }
5505
5506 return src;
5507 }
5508
5509 static struct tgsi_texture_offset
5510 translate_tex_offset(struct st_translate *t,
5511 const st_src_reg *in_offset)
5512 {
5513 struct tgsi_texture_offset offset;
5514 struct ureg_src src = translate_src(t, in_offset);
5515
5516 offset.File = src.File;
5517 offset.Index = src.Index;
5518 offset.SwizzleX = src.SwizzleX;
5519 offset.SwizzleY = src.SwizzleY;
5520 offset.SwizzleZ = src.SwizzleZ;
5521 offset.Padding = 0;
5522
5523 assert(!src.Indirect);
5524 assert(!src.DimIndirect);
5525 assert(!src.Dimension);
5526 assert(!src.Absolute); /* those shouldn't be used with integers anyway */
5527 assert(!src.Negate);
5528
5529 return offset;
5530 }
5531
5532 static void
5533 compile_tgsi_instruction(struct st_translate *t,
5534 const glsl_to_tgsi_instruction *inst)
5535 {
5536 struct ureg_program *ureg = t->ureg;
5537 int i;
5538 struct ureg_dst dst[2];
5539 struct ureg_src src[4];
5540 struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET];
5541
5542 int num_dst;
5543 int num_src;
5544 unsigned tex_target = 0;
5545
5546 num_dst = num_inst_dst_regs(inst);
5547 num_src = num_inst_src_regs(inst);
5548
5549 for (i = 0; i < num_dst; i++)
5550 dst[i] = translate_dst(t,
5551 &inst->dst[i],
5552 inst->saturate);
5553
5554 for (i = 0; i < num_src; i++)
5555 src[i] = translate_src(t, &inst->src[i]);
5556
5557 switch(inst->op) {
5558 case TGSI_OPCODE_BGNLOOP:
5559 case TGSI_OPCODE_ELSE:
5560 case TGSI_OPCODE_ENDLOOP:
5561 case TGSI_OPCODE_IF:
5562 case TGSI_OPCODE_UIF:
5563 assert(num_dst == 0);
5564 ureg_insn(ureg, inst->op, NULL, 0, src, num_src);
5565 return;
5566
5567 case TGSI_OPCODE_TEX:
5568 case TGSI_OPCODE_TXB:
5569 case TGSI_OPCODE_TXD:
5570 case TGSI_OPCODE_TXL:
5571 case TGSI_OPCODE_TXP:
5572 case TGSI_OPCODE_TXQ:
5573 case TGSI_OPCODE_TXQS:
5574 case TGSI_OPCODE_TXF:
5575 case TGSI_OPCODE_TEX2:
5576 case TGSI_OPCODE_TXB2:
5577 case TGSI_OPCODE_TXL2:
5578 case TGSI_OPCODE_TG4:
5579 case TGSI_OPCODE_LODQ:
5580 src[num_src] = t->samplers[inst->resource.index];
5581 assert(src[num_src].File != TGSI_FILE_NULL);
5582 if (inst->resource.reladdr)
5583 src[num_src] =
5584 ureg_src_indirect(src[num_src], ureg_src(t->address[2]));
5585 num_src++;
5586 for (i = 0; i < (int)inst->tex_offset_num_offset; i++) {
5587 texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i]);
5588 }
5589 tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow);
5590
5591 ureg_tex_insn(ureg,
5592 inst->op,
5593 dst, num_dst,
5594 tex_target,
5595 texoffsets, inst->tex_offset_num_offset,
5596 src, num_src);
5597 return;
5598
5599 case TGSI_OPCODE_RESQ:
5600 case TGSI_OPCODE_LOAD:
5601 case TGSI_OPCODE_ATOMUADD:
5602 case TGSI_OPCODE_ATOMXCHG:
5603 case TGSI_OPCODE_ATOMCAS:
5604 case TGSI_OPCODE_ATOMAND:
5605 case TGSI_OPCODE_ATOMOR:
5606 case TGSI_OPCODE_ATOMXOR:
5607 case TGSI_OPCODE_ATOMUMIN:
5608 case TGSI_OPCODE_ATOMUMAX:
5609 case TGSI_OPCODE_ATOMIMIN:
5610 case TGSI_OPCODE_ATOMIMAX:
5611 for (i = num_src - 1; i >= 0; i--)
5612 src[i + 1] = src[i];
5613 num_src++;
5614 if (inst->resource.file == PROGRAM_MEMORY) {
5615 src[0] = t->shared_memory;
5616 } else if (inst->resource.file == PROGRAM_BUFFER) {
5617 src[0] = t->buffers[inst->resource.index];
5618 } else {
5619 src[0] = t->images[inst->resource.index];
5620 tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow);
5621 }
5622 if (inst->resource.reladdr)
5623 src[0] = ureg_src_indirect(src[0], ureg_src(t->address[2]));
5624 assert(src[0].File != TGSI_FILE_NULL);
5625 ureg_memory_insn(ureg, inst->op, dst, num_dst, src, num_src,
5626 inst->buffer_access,
5627 tex_target, inst->image_format);
5628 break;
5629
5630 case TGSI_OPCODE_STORE:
5631 if (inst->resource.file == PROGRAM_MEMORY) {
5632 dst[0] = ureg_dst(t->shared_memory);
5633 } else if (inst->resource.file == PROGRAM_BUFFER) {
5634 dst[0] = ureg_dst(t->buffers[inst->resource.index]);
5635 } else {
5636 dst[0] = ureg_dst(t->images[inst->resource.index]);
5637 tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow);
5638 }
5639 dst[0] = ureg_writemask(dst[0], inst->dst[0].writemask);
5640 if (inst->resource.reladdr)
5641 dst[0] = ureg_dst_indirect(dst[0], ureg_src(t->address[2]));
5642 assert(dst[0].File != TGSI_FILE_NULL);
5643 ureg_memory_insn(ureg, inst->op, dst, num_dst, src, num_src,
5644 inst->buffer_access,
5645 tex_target, inst->image_format);
5646 break;
5647
5648 case TGSI_OPCODE_SCS:
5649 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY);
5650 ureg_insn(ureg, inst->op, dst, num_dst, src, num_src);
5651 break;
5652
5653 default:
5654 ureg_insn(ureg,
5655 inst->op,
5656 dst, num_dst,
5657 src, num_src);
5658 break;
5659 }
5660 }
5661
5662 /**
5663 * Emit the TGSI instructions for inverting and adjusting WPOS.
5664 * This code is unavoidable because it also depends on whether
5665 * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM).
5666 */
5667 static void
5668 emit_wpos_adjustment(struct gl_context *ctx,
5669 struct st_translate *t,
5670 int wpos_transform_const,
5671 boolean invert,
5672 GLfloat adjX, GLfloat adjY[2])
5673 {
5674 struct ureg_program *ureg = t->ureg;
5675
5676 assert(wpos_transform_const >= 0);
5677
5678 /* Fragment program uses fragment position input.
5679 * Need to replace instances of INPUT[WPOS] with temp T
5680 * where T = INPUT[WPOS] is inverted by Y.
5681 */
5682 struct ureg_src wpostrans = ureg_DECL_constant(ureg, wpos_transform_const);
5683 struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg );
5684 struct ureg_src *wpos =
5685 ctx->Const.GLSLFragCoordIsSysVal ?
5686 &t->systemValues[SYSTEM_VALUE_FRAG_COORD] :
5687 &t->inputs[t->inputMapping[VARYING_SLOT_POS]];
5688 struct ureg_src wpos_input = *wpos;
5689
5690 /* First, apply the coordinate shift: */
5691 if (adjX || adjY[0] || adjY[1]) {
5692 if (adjY[0] != adjY[1]) {
5693 /* Adjust the y coordinate by adjY[1] or adjY[0] respectively
5694 * depending on whether inversion is actually going to be applied
5695 * or not, which is determined by testing against the inversion
5696 * state variable used below, which will be either +1 or -1.
5697 */
5698 struct ureg_dst adj_temp = ureg_DECL_local_temporary(ureg);
5699
5700 ureg_CMP(ureg, adj_temp,
5701 ureg_scalar(wpostrans, invert ? 2 : 0),
5702 ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f),
5703 ureg_imm4f(ureg, adjX, adjY[1], 0.0f, 0.0f));
5704 ureg_ADD(ureg, wpos_temp, wpos_input, ureg_src(adj_temp));
5705 } else {
5706 ureg_ADD(ureg, wpos_temp, wpos_input,
5707 ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f));
5708 }
5709 wpos_input = ureg_src(wpos_temp);
5710 } else {
5711 /* MOV wpos_temp, input[wpos]
5712 */
5713 ureg_MOV( ureg, wpos_temp, wpos_input );
5714 }
5715
5716 /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be
5717 * inversion/identity, or the other way around if we're drawing to an FBO.
5718 */
5719 if (invert) {
5720 /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
5721 */
5722 ureg_MAD( ureg,
5723 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
5724 wpos_input,
5725 ureg_scalar(wpostrans, 0),
5726 ureg_scalar(wpostrans, 1));
5727 } else {
5728 /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
5729 */
5730 ureg_MAD( ureg,
5731 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
5732 wpos_input,
5733 ureg_scalar(wpostrans, 2),
5734 ureg_scalar(wpostrans, 3));
5735 }
5736
5737 /* Use wpos_temp as position input from here on:
5738 */
5739 *wpos = ureg_src(wpos_temp);
5740 }
5741
5742
5743 /**
5744 * Emit fragment position/ooordinate code.
5745 */
5746 static void
5747 emit_wpos(struct st_context *st,
5748 struct st_translate *t,
5749 const struct gl_program *program,
5750 struct ureg_program *ureg,
5751 int wpos_transform_const)
5752 {
5753 struct pipe_screen *pscreen = st->pipe->screen;
5754 GLfloat adjX = 0.0f;
5755 GLfloat adjY[2] = { 0.0f, 0.0f };
5756 boolean invert = FALSE;
5757
5758 /* Query the pixel center conventions supported by the pipe driver and set
5759 * adjX, adjY to help out if it cannot handle the requested one internally.
5760 *
5761 * The bias of the y-coordinate depends on whether y-inversion takes place
5762 * (adjY[1]) or not (adjY[0]), which is in turn dependent on whether we are
5763 * drawing to an FBO (causes additional inversion), and whether the pipe
5764 * driver origin and the requested origin differ (the latter condition is
5765 * stored in the 'invert' variable).
5766 *
5767 * For height = 100 (i = integer, h = half-integer, l = lower, u = upper):
5768 *
5769 * center shift only:
5770 * i -> h: +0.5
5771 * h -> i: -0.5
5772 *
5773 * inversion only:
5774 * l,i -> u,i: ( 0.0 + 1.0) * -1 + 100 = 99
5775 * l,h -> u,h: ( 0.5 + 0.0) * -1 + 100 = 99.5
5776 * u,i -> l,i: (99.0 + 1.0) * -1 + 100 = 0
5777 * u,h -> l,h: (99.5 + 0.0) * -1 + 100 = 0.5
5778 *
5779 * inversion and center shift:
5780 * l,i -> u,h: ( 0.0 + 0.5) * -1 + 100 = 99.5
5781 * l,h -> u,i: ( 0.5 + 0.5) * -1 + 100 = 99
5782 * u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5
5783 * u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0
5784 */
5785 if (program->OriginUpperLeft) {
5786 /* Fragment shader wants origin in upper-left */
5787 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
5788 /* the driver supports upper-left origin */
5789 }
5790 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) {
5791 /* the driver supports lower-left origin, need to invert Y */
5792 ureg_property(ureg, TGSI_PROPERTY_FS_COORD_ORIGIN,
5793 TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
5794 invert = TRUE;
5795 }
5796 else
5797 assert(0);
5798 }
5799 else {
5800 /* Fragment shader wants origin in lower-left */
5801 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT))
5802 /* the driver supports lower-left origin */
5803 ureg_property(ureg, TGSI_PROPERTY_FS_COORD_ORIGIN,
5804 TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
5805 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT))
5806 /* the driver supports upper-left origin, need to invert Y */
5807 invert = TRUE;
5808 else
5809 assert(0);
5810 }
5811
5812 if (program->PixelCenterInteger) {
5813 /* Fragment shader wants pixel center integer */
5814 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
5815 /* the driver supports pixel center integer */
5816 adjY[1] = 1.0f;
5817 ureg_property(ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER,
5818 TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
5819 }
5820 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
5821 /* the driver supports pixel center half integer, need to bias X,Y */
5822 adjX = -0.5f;
5823 adjY[0] = -0.5f;
5824 adjY[1] = 0.5f;
5825 }
5826 else
5827 assert(0);
5828 }
5829 else {
5830 /* Fragment shader wants pixel center half integer */
5831 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
5832 /* the driver supports pixel center half integer */
5833 }
5834 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
5835 /* the driver supports pixel center integer, need to bias X,Y */
5836 adjX = adjY[0] = adjY[1] = 0.5f;
5837 ureg_property(ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER,
5838 TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
5839 }
5840 else
5841 assert(0);
5842 }
5843
5844 /* we invert after adjustment so that we avoid the MOV to temporary,
5845 * and reuse the adjustment ADD instead */
5846 emit_wpos_adjustment(st->ctx, t, wpos_transform_const, invert, adjX, adjY);
5847 }
5848
5849 /**
5850 * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back.
5851 * TGSI uses +1 for front, -1 for back.
5852 * This function converts the TGSI value to the GL value. Simply clamping/
5853 * saturating the value to [0,1] does the job.
5854 */
5855 static void
5856 emit_face_var(struct gl_context *ctx, struct st_translate *t)
5857 {
5858 struct ureg_program *ureg = t->ureg;
5859 struct ureg_dst face_temp = ureg_DECL_temporary(ureg);
5860 struct ureg_src face_input = t->inputs[t->inputMapping[VARYING_SLOT_FACE]];
5861
5862 if (ctx->Const.NativeIntegers) {
5863 ureg_FSGE(ureg, face_temp, face_input, ureg_imm1f(ureg, 0));
5864 }
5865 else {
5866 /* MOV_SAT face_temp, input[face] */
5867 ureg_MOV(ureg, ureg_saturate(face_temp), face_input);
5868 }
5869
5870 /* Use face_temp as face input from here on: */
5871 t->inputs[t->inputMapping[VARYING_SLOT_FACE]] = ureg_src(face_temp);
5872 }
5873
5874 static void
5875 emit_compute_block_size(const struct gl_program *prog,
5876 struct ureg_program *ureg) {
5877 ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH,
5878 prog->info.cs.local_size[0]);
5879 ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT,
5880 prog->info.cs.local_size[1]);
5881 ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH,
5882 prog->info.cs.local_size[2]);
5883 }
5884
5885 struct sort_inout_decls {
5886 bool operator()(const struct inout_decl &a, const struct inout_decl &b) const {
5887 return mapping[a.mesa_index] < mapping[b.mesa_index];
5888 }
5889
5890 const GLuint *mapping;
5891 };
5892
5893 /* Sort the given array of decls by the corresponding slot (TGSI file index).
5894 *
5895 * This is for the benefit of older drivers which are broken when the
5896 * declarations aren't sorted in this way.
5897 */
5898 static void
5899 sort_inout_decls_by_slot(struct inout_decl *decls,
5900 unsigned count,
5901 const GLuint mapping[])
5902 {
5903 sort_inout_decls sorter;
5904 sorter.mapping = mapping;
5905 std::sort(decls, decls + count, sorter);
5906 }
5907
5908 static unsigned
5909 st_translate_interp(enum glsl_interp_mode glsl_qual, GLuint varying)
5910 {
5911 switch (glsl_qual) {
5912 case INTERP_MODE_NONE:
5913 if (varying == VARYING_SLOT_COL0 || varying == VARYING_SLOT_COL1)
5914 return TGSI_INTERPOLATE_COLOR;
5915 return TGSI_INTERPOLATE_PERSPECTIVE;
5916 case INTERP_MODE_SMOOTH:
5917 return TGSI_INTERPOLATE_PERSPECTIVE;
5918 case INTERP_MODE_FLAT:
5919 return TGSI_INTERPOLATE_CONSTANT;
5920 case INTERP_MODE_NOPERSPECTIVE:
5921 return TGSI_INTERPOLATE_LINEAR;
5922 default:
5923 assert(0 && "unexpected interp mode in st_translate_interp()");
5924 return TGSI_INTERPOLATE_PERSPECTIVE;
5925 }
5926 }
5927
5928 /**
5929 * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format.
5930 * \param program the program to translate
5931 * \param numInputs number of input registers used
5932 * \param inputMapping maps Mesa fragment program inputs to TGSI generic
5933 * input indexes
5934 * \param inputSemanticName the TGSI_SEMANTIC flag for each input
5935 * \param inputSemanticIndex the semantic index (ex: which texcoord) for
5936 * each input
5937 * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
5938 * \param numOutputs number of output registers used
5939 * \param outputMapping maps Mesa fragment program outputs to TGSI
5940 * generic outputs
5941 * \param outputSemanticName the TGSI_SEMANTIC flag for each output
5942 * \param outputSemanticIndex the semantic index (ex: which texcoord) for
5943 * each output
5944 *
5945 * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY
5946 */
5947 extern "C" enum pipe_error
5948 st_translate_program(
5949 struct gl_context *ctx,
5950 uint procType,
5951 struct ureg_program *ureg,
5952 glsl_to_tgsi_visitor *program,
5953 const struct gl_program *proginfo,
5954 GLuint numInputs,
5955 const GLuint inputMapping[],
5956 const GLuint inputSlotToAttr[],
5957 const ubyte inputSemanticName[],
5958 const ubyte inputSemanticIndex[],
5959 const GLuint interpMode[],
5960 GLuint numOutputs,
5961 const GLuint outputMapping[],
5962 const GLuint outputSlotToAttr[],
5963 const ubyte outputSemanticName[],
5964 const ubyte outputSemanticIndex[])
5965 {
5966 struct st_translate *t;
5967 unsigned i;
5968 struct gl_program_constants *frag_const =
5969 &ctx->Const.Program[MESA_SHADER_FRAGMENT];
5970 enum pipe_error ret = PIPE_OK;
5971
5972 assert(numInputs <= ARRAY_SIZE(t->inputs));
5973 assert(numOutputs <= ARRAY_SIZE(t->outputs));
5974
5975 t = CALLOC_STRUCT(st_translate);
5976 if (!t) {
5977 ret = PIPE_ERROR_OUT_OF_MEMORY;
5978 goto out;
5979 }
5980
5981 t->procType = procType;
5982 t->inputMapping = inputMapping;
5983 t->outputMapping = outputMapping;
5984 t->ureg = ureg;
5985 t->num_temp_arrays = program->next_array;
5986 if (t->num_temp_arrays)
5987 t->arrays = (struct ureg_dst*)
5988 calloc(t->num_temp_arrays, sizeof(t->arrays[0]));
5989
5990 /*
5991 * Declare input attributes.
5992 */
5993 switch (procType) {
5994 case PIPE_SHADER_FRAGMENT:
5995 case PIPE_SHADER_GEOMETRY:
5996 case PIPE_SHADER_TESS_EVAL:
5997 case PIPE_SHADER_TESS_CTRL:
5998 sort_inout_decls_by_slot(program->inputs, program->num_inputs, inputMapping);
5999
6000 for (i = 0; i < program->num_inputs; ++i) {
6001 struct inout_decl *decl = &program->inputs[i];
6002 unsigned slot = inputMapping[decl->mesa_index];
6003 struct ureg_src src;
6004 ubyte tgsi_usage_mask = decl->usage_mask;
6005
6006 if (glsl_base_type_is_64bit(decl->base_type)) {
6007 if (tgsi_usage_mask == 1)
6008 tgsi_usage_mask = TGSI_WRITEMASK_XY;
6009 else if (tgsi_usage_mask == 2)
6010 tgsi_usage_mask = TGSI_WRITEMASK_ZW;
6011 else
6012 tgsi_usage_mask = TGSI_WRITEMASK_XYZW;
6013 }
6014
6015 unsigned interp_mode = 0;
6016 unsigned interp_location = 0;
6017 if (procType == PIPE_SHADER_FRAGMENT) {
6018 assert(interpMode);
6019 interp_mode = interpMode[slot] != TGSI_INTERPOLATE_COUNT ?
6020 interpMode[slot] :
6021 st_translate_interp(decl->interp, inputSlotToAttr[slot]);
6022
6023 interp_location = decl->interp_loc;
6024 }
6025
6026 src = ureg_DECL_fs_input_cyl_centroid_layout(ureg,
6027 inputSemanticName[slot], inputSemanticIndex[slot],
6028 interp_mode, 0, interp_location, slot, tgsi_usage_mask,
6029 decl->array_id, decl->size);
6030
6031 for (unsigned j = 0; j < decl->size; ++j) {
6032 if (t->inputs[slot + j].File != TGSI_FILE_INPUT) {
6033 /* The ArrayID is set up in dst_register */
6034 t->inputs[slot + j] = src;
6035 t->inputs[slot + j].ArrayID = 0;
6036 t->inputs[slot + j].Index += j;
6037 }
6038 }
6039 }
6040 break;
6041 case PIPE_SHADER_VERTEX:
6042 for (i = 0; i < numInputs; i++) {
6043 t->inputs[i] = ureg_DECL_vs_input(ureg, i);
6044 }
6045 break;
6046 case PIPE_SHADER_COMPUTE:
6047 break;
6048 default:
6049 assert(0);
6050 }
6051
6052 /*
6053 * Declare output attributes.
6054 */
6055 switch (procType) {
6056 case PIPE_SHADER_FRAGMENT:
6057 case PIPE_SHADER_COMPUTE:
6058 break;
6059 case PIPE_SHADER_GEOMETRY:
6060 case PIPE_SHADER_TESS_EVAL:
6061 case PIPE_SHADER_TESS_CTRL:
6062 case PIPE_SHADER_VERTEX:
6063 sort_inout_decls_by_slot(program->outputs, program->num_outputs, outputMapping);
6064
6065 for (i = 0; i < program->num_outputs; ++i) {
6066 struct inout_decl *decl = &program->outputs[i];
6067 unsigned slot = outputMapping[decl->mesa_index];
6068 struct ureg_dst dst;
6069 ubyte tgsi_usage_mask = decl->usage_mask;
6070
6071 if (glsl_base_type_is_64bit(decl->base_type)) {
6072 if (tgsi_usage_mask == 1)
6073 tgsi_usage_mask = TGSI_WRITEMASK_XY;
6074 else if (tgsi_usage_mask == 2)
6075 tgsi_usage_mask = TGSI_WRITEMASK_ZW;
6076 else
6077 tgsi_usage_mask = TGSI_WRITEMASK_XYZW;
6078 }
6079
6080 dst = ureg_DECL_output_layout(ureg,
6081 outputSemanticName[slot], outputSemanticIndex[slot],
6082 slot, tgsi_usage_mask, decl->array_id, decl->size);
6083
6084 for (unsigned j = 0; j < decl->size; ++j) {
6085 if (t->outputs[slot + j].File != TGSI_FILE_OUTPUT) {
6086 /* The ArrayID is set up in dst_register */
6087 t->outputs[slot + j] = dst;
6088 t->outputs[slot + j].ArrayID = 0;
6089 t->outputs[slot + j].Index += j;
6090 }
6091 }
6092 }
6093 break;
6094 default:
6095 assert(0);
6096 }
6097
6098 if (procType == PIPE_SHADER_FRAGMENT) {
6099 if (program->shader->info.EarlyFragmentTests)
6100 ureg_property(ureg, TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL, 1);
6101
6102 if (proginfo->info.inputs_read & VARYING_BIT_POS) {
6103 /* Must do this after setting up t->inputs. */
6104 emit_wpos(st_context(ctx), t, proginfo, ureg,
6105 program->wpos_transform_const);
6106 }
6107
6108 if (proginfo->info.inputs_read & VARYING_BIT_FACE)
6109 emit_face_var(ctx, t);
6110
6111 for (i = 0; i < numOutputs; i++) {
6112 switch (outputSemanticName[i]) {
6113 case TGSI_SEMANTIC_POSITION:
6114 t->outputs[i] = ureg_DECL_output(ureg,
6115 TGSI_SEMANTIC_POSITION, /* Z/Depth */
6116 outputSemanticIndex[i]);
6117 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z);
6118 break;
6119 case TGSI_SEMANTIC_STENCIL:
6120 t->outputs[i] = ureg_DECL_output(ureg,
6121 TGSI_SEMANTIC_STENCIL, /* Stencil */
6122 outputSemanticIndex[i]);
6123 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y);
6124 break;
6125 case TGSI_SEMANTIC_COLOR:
6126 t->outputs[i] = ureg_DECL_output(ureg,
6127 TGSI_SEMANTIC_COLOR,
6128 outputSemanticIndex[i]);
6129 break;
6130 case TGSI_SEMANTIC_SAMPLEMASK:
6131 t->outputs[i] = ureg_DECL_output(ureg,
6132 TGSI_SEMANTIC_SAMPLEMASK,
6133 outputSemanticIndex[i]);
6134 /* TODO: If we ever support more than 32 samples, this will have
6135 * to become an array.
6136 */
6137 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X);
6138 break;
6139 default:
6140 assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR");
6141 ret = PIPE_ERROR_BAD_INPUT;
6142 goto out;
6143 }
6144 }
6145 }
6146 else if (procType == PIPE_SHADER_VERTEX) {
6147 for (i = 0; i < numOutputs; i++) {
6148 if (outputSemanticName[i] == TGSI_SEMANTIC_FOG) {
6149 /* force register to contain a fog coordinate in the form (F, 0, 0, 1). */
6150 ureg_MOV(ureg,
6151 ureg_writemask(t->outputs[i], TGSI_WRITEMASK_YZW),
6152 ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
6153 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X);
6154 }
6155 }
6156 }
6157
6158 if (procType == PIPE_SHADER_COMPUTE) {
6159 emit_compute_block_size(proginfo, ureg);
6160 }
6161
6162 /* Declare address register.
6163 */
6164 if (program->num_address_regs > 0) {
6165 assert(program->num_address_regs <= 3);
6166 for (int i = 0; i < program->num_address_regs; i++)
6167 t->address[i] = ureg_DECL_address(ureg);
6168 }
6169
6170 /* Declare misc input registers
6171 */
6172 {
6173 GLbitfield sysInputs = proginfo->SystemValuesRead;
6174
6175 for (i = 0; sysInputs; i++) {
6176 if (sysInputs & (1 << i)) {
6177 unsigned semName = _mesa_sysval_to_semantic(i);
6178
6179 t->systemValues[i] = ureg_DECL_system_value(ureg, semName, 0);
6180
6181 if (semName == TGSI_SEMANTIC_INSTANCEID ||
6182 semName == TGSI_SEMANTIC_VERTEXID) {
6183 /* From Gallium perspective, these system values are always
6184 * integer, and require native integer support. However, if
6185 * native integer is supported on the vertex stage but not the
6186 * pixel stage (e.g, i915g + draw), Mesa will generate IR that
6187 * assumes these system values are floats. To resolve the
6188 * inconsistency, we insert a U2F.
6189 */
6190 struct st_context *st = st_context(ctx);
6191 struct pipe_screen *pscreen = st->pipe->screen;
6192 assert(procType == PIPE_SHADER_VERTEX);
6193 assert(pscreen->get_shader_param(pscreen, PIPE_SHADER_VERTEX, PIPE_SHADER_CAP_INTEGERS));
6194 (void) pscreen;
6195 if (!ctx->Const.NativeIntegers) {
6196 struct ureg_dst temp = ureg_DECL_local_temporary(t->ureg);
6197 ureg_U2F( t->ureg, ureg_writemask(temp, TGSI_WRITEMASK_X), t->systemValues[i]);
6198 t->systemValues[i] = ureg_scalar(ureg_src(temp), 0);
6199 }
6200 }
6201
6202 if (procType == PIPE_SHADER_FRAGMENT &&
6203 semName == TGSI_SEMANTIC_POSITION)
6204 emit_wpos(st_context(ctx), t, proginfo, ureg,
6205 program->wpos_transform_const);
6206
6207 sysInputs &= ~(1 << i);
6208 }
6209 }
6210 }
6211
6212 t->array_sizes = program->array_sizes;
6213 t->input_decls = program->inputs;
6214 t->num_input_decls = program->num_inputs;
6215 t->output_decls = program->outputs;
6216 t->num_output_decls = program->num_outputs;
6217
6218 /* Emit constants and uniforms. TGSI uses a single index space for these,
6219 * so we put all the translated regs in t->constants.
6220 */
6221 if (proginfo->Parameters) {
6222 t->constants = (struct ureg_src *)
6223 calloc(proginfo->Parameters->NumParameters, sizeof(t->constants[0]));
6224 if (t->constants == NULL) {
6225 ret = PIPE_ERROR_OUT_OF_MEMORY;
6226 goto out;
6227 }
6228 t->num_constants = proginfo->Parameters->NumParameters;
6229
6230 for (i = 0; i < proginfo->Parameters->NumParameters; i++) {
6231 switch (proginfo->Parameters->Parameters[i].Type) {
6232 case PROGRAM_STATE_VAR:
6233 case PROGRAM_UNIFORM:
6234 t->constants[i] = ureg_DECL_constant(ureg, i);
6235 break;
6236
6237 /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect
6238 * addressing of the const buffer.
6239 * FIXME: Be smarter and recognize param arrays:
6240 * indirect addressing is only valid within the referenced
6241 * array.
6242 */
6243 case PROGRAM_CONSTANT:
6244 if (program->indirect_addr_consts)
6245 t->constants[i] = ureg_DECL_constant(ureg, i);
6246 else
6247 t->constants[i] = emit_immediate(t,
6248 proginfo->Parameters->ParameterValues[i],
6249 proginfo->Parameters->Parameters[i].DataType,
6250 4);
6251 break;
6252 default:
6253 break;
6254 }
6255 }
6256 }
6257
6258 if (program->shader) {
6259 unsigned num_ubos = program->shader->NumUniformBlocks;
6260
6261 for (i = 0; i < num_ubos; i++) {
6262 unsigned size = program->shader->UniformBlocks[i]->UniformBufferSize;
6263 unsigned num_const_vecs = (size + 15) / 16;
6264 unsigned first, last;
6265 assert(num_const_vecs > 0);
6266 first = 0;
6267 last = num_const_vecs > 0 ? num_const_vecs - 1 : 0;
6268 ureg_DECL_constant2D(t->ureg, first, last, i + 1);
6269 }
6270 }
6271
6272 /* Emit immediate values.
6273 */
6274 t->immediates = (struct ureg_src *)
6275 calloc(program->num_immediates, sizeof(struct ureg_src));
6276 if (t->immediates == NULL) {
6277 ret = PIPE_ERROR_OUT_OF_MEMORY;
6278 goto out;
6279 }
6280 t->num_immediates = program->num_immediates;
6281
6282 i = 0;
6283 foreach_in_list(immediate_storage, imm, &program->immediates) {
6284 assert(i < program->num_immediates);
6285 t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size32);
6286 }
6287 assert(i == program->num_immediates);
6288
6289 /* texture samplers */
6290 for (i = 0; i < frag_const->MaxTextureImageUnits; i++) {
6291 if (program->samplers_used & (1u << i)) {
6292 unsigned type;
6293
6294 t->samplers[i] = ureg_DECL_sampler(ureg, i);
6295
6296 switch (program->sampler_types[i]) {
6297 case GLSL_TYPE_INT:
6298 type = TGSI_RETURN_TYPE_SINT;
6299 break;
6300 case GLSL_TYPE_UINT:
6301 type = TGSI_RETURN_TYPE_UINT;
6302 break;
6303 case GLSL_TYPE_FLOAT:
6304 type = TGSI_RETURN_TYPE_FLOAT;
6305 break;
6306 default:
6307 unreachable("not reached");
6308 }
6309
6310 ureg_DECL_sampler_view( ureg, i, program->sampler_targets[i],
6311 type, type, type, type );
6312 }
6313 }
6314
6315 for (i = 0; i < frag_const->MaxAtomicBuffers; i++) {
6316 if (program->buffers_used & (1 << i)) {
6317 t->buffers[i] = ureg_DECL_buffer(ureg, i, true);
6318 }
6319 }
6320
6321 for (; i < frag_const->MaxAtomicBuffers + frag_const->MaxShaderStorageBlocks;
6322 i++) {
6323 if (program->buffers_used & (1 << i)) {
6324 t->buffers[i] = ureg_DECL_buffer(ureg, i, false);
6325 }
6326 }
6327
6328 if (program->use_shared_memory)
6329 t->shared_memory = ureg_DECL_memory(ureg, TGSI_MEMORY_TYPE_SHARED);
6330
6331 for (i = 0; i < program->shader->NumImages; i++) {
6332 if (program->images_used & (1 << i)) {
6333 t->images[i] = ureg_DECL_image(ureg, i,
6334 program->image_targets[i],
6335 program->image_formats[i],
6336 true, false);
6337 }
6338 }
6339
6340 /* Emit each instruction in turn:
6341 */
6342 foreach_in_list(glsl_to_tgsi_instruction, inst, &program->instructions)
6343 compile_tgsi_instruction(t, inst);
6344
6345 /* Set the next shader stage hint for VS and TES. */
6346 switch (procType) {
6347 case PIPE_SHADER_VERTEX:
6348 case PIPE_SHADER_TESS_EVAL:
6349 if (program->shader_program->SeparateShader)
6350 break;
6351
6352 for (i = program->shader->Stage+1; i <= MESA_SHADER_FRAGMENT; i++) {
6353 if (program->shader_program->_LinkedShaders[i]) {
6354 unsigned next;
6355
6356 switch (i) {
6357 case MESA_SHADER_TESS_CTRL:
6358 next = PIPE_SHADER_TESS_CTRL;
6359 break;
6360 case MESA_SHADER_TESS_EVAL:
6361 next = PIPE_SHADER_TESS_EVAL;
6362 break;
6363 case MESA_SHADER_GEOMETRY:
6364 next = PIPE_SHADER_GEOMETRY;
6365 break;
6366 case MESA_SHADER_FRAGMENT:
6367 next = PIPE_SHADER_FRAGMENT;
6368 break;
6369 default:
6370 assert(0);
6371 continue;
6372 }
6373
6374 ureg_set_next_shader_processor(ureg, next);
6375 break;
6376 }
6377 }
6378 break;
6379 }
6380
6381 out:
6382 if (t) {
6383 free(t->arrays);
6384 free(t->temps);
6385 free(t->constants);
6386 t->num_constants = 0;
6387 free(t->immediates);
6388 t->num_immediates = 0;
6389 FREE(t);
6390 }
6391
6392 return ret;
6393 }
6394 /* ----------------------------- End TGSI code ------------------------------ */
6395
6396
6397 /**
6398 * Convert a shader's GLSL IR into a Mesa gl_program, although without
6399 * generating Mesa IR.
6400 */
6401 static struct gl_program *
6402 get_mesa_program_tgsi(struct gl_context *ctx,
6403 struct gl_shader_program *shader_program,
6404 struct gl_linked_shader *shader)
6405 {
6406 glsl_to_tgsi_visitor* v;
6407 struct gl_program *prog;
6408 GLenum target = _mesa_shader_stage_to_program(shader->Stage);
6409 struct gl_shader_compiler_options *options =
6410 &ctx->Const.ShaderCompilerOptions[shader->Stage];
6411 struct pipe_screen *pscreen = ctx->st->pipe->screen;
6412 enum pipe_shader_type ptarget = st_shader_stage_to_ptarget(shader->Stage);
6413
6414 validate_ir_tree(shader->ir);
6415
6416 prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
6417 if (!prog)
6418 return NULL;
6419
6420 _mesa_reference_program(ctx, &shader->Program, prog);
6421
6422 prog->Parameters = _mesa_new_parameter_list();
6423 v = new glsl_to_tgsi_visitor();
6424 v->ctx = ctx;
6425 v->prog = prog;
6426 v->shader_program = shader_program;
6427 v->shader = shader;
6428 v->options = options;
6429 v->glsl_version = ctx->Const.GLSLVersion;
6430 v->native_integers = ctx->Const.NativeIntegers;
6431
6432 v->have_sqrt = pscreen->get_shader_param(pscreen, ptarget,
6433 PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED);
6434 v->have_fma = pscreen->get_shader_param(pscreen, ptarget,
6435 PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED);
6436
6437 _mesa_generate_parameters_list_for_uniforms(shader_program, shader,
6438 prog->Parameters);
6439
6440 /* Remove reads from output registers. */
6441 lower_output_reads(shader->Stage, shader->ir);
6442
6443 /* Emit intermediate IR for main(). */
6444 visit_exec_list(shader->ir, v);
6445
6446 #if 0
6447 /* Print out some information (for debugging purposes) used by the
6448 * optimization passes. */
6449 {
6450 int i;
6451 int *first_writes = rzalloc_array(v->mem_ctx, int, v->next_temp);
6452 int *first_reads = rzalloc_array(v->mem_ctx, int, v->next_temp);
6453 int *last_writes = rzalloc_array(v->mem_ctx, int, v->next_temp);
6454 int *last_reads = rzalloc_array(v->mem_ctx, int, v->next_temp);
6455
6456 for (i = 0; i < v->next_temp; i++) {
6457 first_writes[i] = -1;
6458 first_reads[i] = -1;
6459 last_writes[i] = -1;
6460 last_reads[i] = -1;
6461 }
6462 v->get_first_temp_read(first_reads);
6463 v->get_last_temp_read_first_temp_write(last_reads, first_writes);
6464 v->get_last_temp_write(last_writes);
6465 for (i = 0; i < v->next_temp; i++)
6466 printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, first_reads[i],
6467 first_writes[i],
6468 last_reads[i],
6469 last_writes[i]);
6470 ralloc_free(first_writes);
6471 ralloc_free(first_reads);
6472 ralloc_free(last_writes);
6473 ralloc_free(last_reads);
6474 }
6475 #endif
6476
6477 /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
6478 v->simplify_cmp();
6479
6480 if (shader->Stage != MESA_SHADER_TESS_CTRL &&
6481 shader->Stage != MESA_SHADER_TESS_EVAL)
6482 v->copy_propagate();
6483
6484 while (v->eliminate_dead_code());
6485
6486 v->merge_two_dsts();
6487 v->merge_registers();
6488 v->renumber_registers();
6489
6490 /* Write the END instruction. */
6491 v->emit_asm(NULL, TGSI_OPCODE_END);
6492
6493 if (ctx->_Shader->Flags & GLSL_DUMP) {
6494 _mesa_log("\n");
6495 _mesa_log("GLSL IR for linked %s program %d:\n",
6496 _mesa_shader_stage_to_string(shader->Stage),
6497 shader_program->Name);
6498 _mesa_print_ir(_mesa_get_log_file(), shader->ir, NULL);
6499 _mesa_log("\n\n");
6500 }
6501
6502 prog->Instructions = NULL;
6503 prog->NumInstructions = 0;
6504
6505 do_set_program_inouts(shader->ir, prog, shader->Stage);
6506 _mesa_copy_linked_program_data(shader_program, shader);
6507 shrink_array_declarations(v->inputs, v->num_inputs,
6508 &prog->info.inputs_read,
6509 prog->info.double_inputs_read,
6510 &prog->PatchInputsRead);
6511 shrink_array_declarations(v->outputs, v->num_outputs,
6512 &prog->OutputsWritten, 0ULL, &prog->PatchOutputsWritten);
6513 count_resources(v, prog);
6514
6515 /* The GLSL IR won't be needed anymore. */
6516 ralloc_free(shader->ir);
6517 shader->ir = NULL;
6518
6519 /* This must be done before the uniform storage is associated. */
6520 if (shader->Stage == MESA_SHADER_FRAGMENT &&
6521 (prog->info.inputs_read & VARYING_BIT_POS ||
6522 prog->SystemValuesRead & (1 << SYSTEM_VALUE_FRAG_COORD))) {
6523 static const gl_state_index wposTransformState[STATE_LENGTH] = {
6524 STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM
6525 };
6526
6527 v->wpos_transform_const = _mesa_add_state_reference(prog->Parameters,
6528 wposTransformState);
6529 }
6530
6531 /* Avoid reallocation of the program parameter list, because the uniform
6532 * storage is only associated with the original parameter list.
6533 * This should be enough for Bitmap and DrawPixels constants.
6534 */
6535 _mesa_reserve_parameter_storage(prog->Parameters, 8);
6536
6537 /* This has to be done last. Any operation the can cause
6538 * prog->ParameterValues to get reallocated (e.g., anything that adds a
6539 * program constant) has to happen before creating this linkage.
6540 */
6541 _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters);
6542 if (!shader_program->LinkStatus) {
6543 free_glsl_to_tgsi_visitor(v);
6544 return NULL;
6545 }
6546
6547 struct st_vertex_program *stvp;
6548 struct st_fragment_program *stfp;
6549 struct st_geometry_program *stgp;
6550 struct st_tessctrl_program *sttcp;
6551 struct st_tesseval_program *sttep;
6552 struct st_compute_program *stcp;
6553
6554 switch (shader->Stage) {
6555 case MESA_SHADER_VERTEX:
6556 stvp = (struct st_vertex_program *)prog;
6557 stvp->glsl_to_tgsi = v;
6558 break;
6559 case MESA_SHADER_FRAGMENT:
6560 stfp = (struct st_fragment_program *)prog;
6561 stfp->glsl_to_tgsi = v;
6562 break;
6563 case MESA_SHADER_GEOMETRY:
6564 stgp = (struct st_geometry_program *)prog;
6565 stgp->glsl_to_tgsi = v;
6566 break;
6567 case MESA_SHADER_TESS_CTRL:
6568 sttcp = (struct st_tessctrl_program *)prog;
6569 sttcp->glsl_to_tgsi = v;
6570 break;
6571 case MESA_SHADER_TESS_EVAL:
6572 sttep = (struct st_tesseval_program *)prog;
6573 sttep->glsl_to_tgsi = v;
6574 break;
6575 case MESA_SHADER_COMPUTE:
6576 stcp = (struct st_compute_program *)prog;
6577 stcp->glsl_to_tgsi = v;
6578 break;
6579 default:
6580 assert(!"should not be reached");
6581 return NULL;
6582 }
6583
6584 return prog;
6585 }
6586
6587 static void
6588 set_affected_state_flags(uint64_t *states,
6589 struct gl_program *prog,
6590 struct gl_linked_shader *shader,
6591 uint64_t new_constants,
6592 uint64_t new_sampler_views,
6593 uint64_t new_samplers,
6594 uint64_t new_images,
6595 uint64_t new_ubos,
6596 uint64_t new_ssbos,
6597 uint64_t new_atomics)
6598 {
6599 if (prog->Parameters->NumParameters)
6600 *states |= new_constants;
6601
6602 if (shader->num_samplers)
6603 *states |= new_sampler_views | new_samplers;
6604
6605 if (shader->NumImages)
6606 *states |= new_images;
6607
6608 if (shader->NumUniformBlocks)
6609 *states |= new_ubos;
6610
6611 if (shader->NumShaderStorageBlocks)
6612 *states |= new_ssbos;
6613
6614 if (shader->NumAtomicBuffers)
6615 *states |= new_atomics;
6616 }
6617
6618 static struct gl_program *
6619 get_mesa_program(struct gl_context *ctx,
6620 struct gl_shader_program *shader_program,
6621 struct gl_linked_shader *shader)
6622 {
6623 struct pipe_screen *pscreen = ctx->st->pipe->screen;
6624 enum pipe_shader_type ptarget = st_shader_stage_to_ptarget(shader->Stage);
6625 enum pipe_shader_ir preferred_ir = (enum pipe_shader_ir)
6626 pscreen->get_shader_param(pscreen, ptarget, PIPE_SHADER_CAP_PREFERRED_IR);
6627 struct gl_program *prog = NULL;
6628
6629 if (preferred_ir == PIPE_SHADER_IR_NIR) {
6630 /* TODO only for GLSL VS/FS for now: */
6631 switch (shader->Stage) {
6632 case MESA_SHADER_VERTEX:
6633 case MESA_SHADER_FRAGMENT:
6634 prog = st_nir_get_mesa_program(ctx, shader_program, shader);
6635 default:
6636 break;
6637 }
6638 } else {
6639 prog = get_mesa_program_tgsi(ctx, shader_program, shader);
6640 }
6641
6642 if (prog) {
6643 uint64_t *states;
6644
6645 /* This determines which states will be updated when the shader is
6646 * bound.
6647 */
6648 switch (shader->Stage) {
6649 case MESA_SHADER_VERTEX:
6650 states = &((struct st_vertex_program*)prog)->affected_states;
6651
6652 *states = ST_NEW_VS_STATE |
6653 ST_NEW_RASTERIZER |
6654 ST_NEW_VERTEX_ARRAYS;
6655
6656 set_affected_state_flags(states, prog, shader,
6657 ST_NEW_VS_CONSTANTS,
6658 ST_NEW_VS_SAMPLER_VIEWS,
6659 ST_NEW_RENDER_SAMPLERS,
6660 ST_NEW_VS_IMAGES,
6661 ST_NEW_VS_UBOS,
6662 ST_NEW_VS_SSBOS,
6663 ST_NEW_VS_ATOMICS);
6664 break;
6665
6666 case MESA_SHADER_TESS_CTRL:
6667 states = &((struct st_tessctrl_program*)prog)->affected_states;
6668
6669 *states = ST_NEW_TCS_STATE;
6670
6671 set_affected_state_flags(states, prog, shader,
6672 ST_NEW_TCS_CONSTANTS,
6673 ST_NEW_TCS_SAMPLER_VIEWS,
6674 ST_NEW_RENDER_SAMPLERS,
6675 ST_NEW_TCS_IMAGES,
6676 ST_NEW_TCS_UBOS,
6677 ST_NEW_TCS_SSBOS,
6678 ST_NEW_TCS_ATOMICS);
6679 break;
6680
6681 case MESA_SHADER_TESS_EVAL:
6682 states = &((struct st_tesseval_program*)prog)->affected_states;
6683
6684 *states = ST_NEW_TES_STATE |
6685 ST_NEW_RASTERIZER;
6686
6687 set_affected_state_flags(states, prog, shader,
6688 ST_NEW_TES_CONSTANTS,
6689 ST_NEW_TES_SAMPLER_VIEWS,
6690 ST_NEW_RENDER_SAMPLERS,
6691 ST_NEW_TES_IMAGES,
6692 ST_NEW_TES_UBOS,
6693 ST_NEW_TES_SSBOS,
6694 ST_NEW_TES_ATOMICS);
6695 break;
6696
6697 case MESA_SHADER_GEOMETRY:
6698 states = &((struct st_geometry_program*)prog)->affected_states;
6699
6700 *states = ST_NEW_GS_STATE |
6701 ST_NEW_RASTERIZER;
6702
6703 set_affected_state_flags(states, prog, shader,
6704 ST_NEW_GS_CONSTANTS,
6705 ST_NEW_GS_SAMPLER_VIEWS,
6706 ST_NEW_RENDER_SAMPLERS,
6707 ST_NEW_GS_IMAGES,
6708 ST_NEW_GS_UBOS,
6709 ST_NEW_GS_SSBOS,
6710 ST_NEW_GS_ATOMICS);
6711 break;
6712
6713 case MESA_SHADER_FRAGMENT:
6714 states = &((struct st_fragment_program*)prog)->affected_states;
6715
6716 /* gl_FragCoord and glDrawPixels always use constants. */
6717 *states = ST_NEW_FS_STATE |
6718 ST_NEW_SAMPLE_SHADING |
6719 ST_NEW_FS_CONSTANTS;
6720
6721 set_affected_state_flags(states, prog, shader,
6722 ST_NEW_FS_CONSTANTS,
6723 ST_NEW_FS_SAMPLER_VIEWS,
6724 ST_NEW_RENDER_SAMPLERS,
6725 ST_NEW_FS_IMAGES,
6726 ST_NEW_FS_UBOS,
6727 ST_NEW_FS_SSBOS,
6728 ST_NEW_FS_ATOMICS);
6729 break;
6730
6731 case MESA_SHADER_COMPUTE:
6732 states = &((struct st_compute_program*)prog)->affected_states;
6733
6734 *states = ST_NEW_CS_STATE;
6735
6736 set_affected_state_flags(states, prog, shader,
6737 ST_NEW_CS_CONSTANTS,
6738 ST_NEW_CS_SAMPLER_VIEWS,
6739 ST_NEW_CS_SAMPLERS,
6740 ST_NEW_CS_IMAGES,
6741 ST_NEW_CS_UBOS,
6742 ST_NEW_CS_SSBOS,
6743 ST_NEW_CS_ATOMICS);
6744 break;
6745
6746 default:
6747 unreachable("unhandled shader stage");
6748 }
6749 }
6750
6751 return prog;
6752 }
6753
6754
6755 extern "C" {
6756
6757 /**
6758 * Link a shader.
6759 * Called via ctx->Driver.LinkShader()
6760 * This actually involves converting GLSL IR into an intermediate TGSI-like IR
6761 * with code lowering and other optimizations.
6762 */
6763 GLboolean
6764 st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
6765 {
6766 struct pipe_screen *pscreen = ctx->st->pipe->screen;
6767 assert(prog->LinkStatus);
6768
6769 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
6770 if (prog->_LinkedShaders[i] == NULL)
6771 continue;
6772
6773 bool progress;
6774 exec_list *ir = prog->_LinkedShaders[i]->ir;
6775 gl_shader_stage stage = prog->_LinkedShaders[i]->Stage;
6776 const struct gl_shader_compiler_options *options =
6777 &ctx->Const.ShaderCompilerOptions[stage];
6778 enum pipe_shader_type ptarget = st_shader_stage_to_ptarget(stage);
6779 bool have_dround = pscreen->get_shader_param(pscreen, ptarget,
6780 PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED);
6781 bool have_dfrexp = pscreen->get_shader_param(pscreen, ptarget,
6782 PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED);
6783
6784 /* If there are forms of indirect addressing that the driver
6785 * cannot handle, perform the lowering pass.
6786 */
6787 if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput ||
6788 options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) {
6789 lower_variable_index_to_cond_assign(prog->_LinkedShaders[i]->Stage, ir,
6790 options->EmitNoIndirectInput,
6791 options->EmitNoIndirectOutput,
6792 options->EmitNoIndirectTemp,
6793 options->EmitNoIndirectUniform);
6794 }
6795
6796 if (ctx->Extensions.ARB_shading_language_packing) {
6797 unsigned lower_inst = LOWER_PACK_SNORM_2x16 |
6798 LOWER_UNPACK_SNORM_2x16 |
6799 LOWER_PACK_UNORM_2x16 |
6800 LOWER_UNPACK_UNORM_2x16 |
6801 LOWER_PACK_SNORM_4x8 |
6802 LOWER_UNPACK_SNORM_4x8 |
6803 LOWER_UNPACK_UNORM_4x8 |
6804 LOWER_PACK_UNORM_4x8;
6805
6806 if (ctx->Extensions.ARB_gpu_shader5)
6807 lower_inst |= LOWER_PACK_USE_BFI |
6808 LOWER_PACK_USE_BFE;
6809 if (!ctx->st->has_half_float_packing)
6810 lower_inst |= LOWER_PACK_HALF_2x16 |
6811 LOWER_UNPACK_HALF_2x16;
6812
6813 lower_packing_builtins(ir, lower_inst);
6814 }
6815
6816 if (!pscreen->get_param(pscreen, PIPE_CAP_TEXTURE_GATHER_OFFSETS))
6817 lower_offset_arrays(ir);
6818 do_mat_op_to_vec(ir);
6819 lower_instructions(ir,
6820 MOD_TO_FLOOR |
6821 DIV_TO_MUL_RCP |
6822 EXP_TO_EXP2 |
6823 LOG_TO_LOG2 |
6824 LDEXP_TO_ARITH |
6825 (have_dfrexp ? 0 : DFREXP_DLDEXP_TO_ARITH) |
6826 CARRY_TO_ARITH |
6827 BORROW_TO_ARITH |
6828 (have_dround ? 0 : DOPS_TO_DFRAC) |
6829 (options->EmitNoPow ? POW_TO_EXP2 : 0) |
6830 (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0) |
6831 (options->EmitNoSat ? SAT_TO_CLAMP : 0) |
6832 /* Assume that if ARB_gpu_shader5 is not supported
6833 * then all of the extended integer functions need
6834 * lowering. It may be necessary to add some caps
6835 * for individual instructions.
6836 */
6837 (!ctx->Extensions.ARB_gpu_shader5
6838 ? BIT_COUNT_TO_MATH |
6839 EXTRACT_TO_SHIFTS |
6840 INSERT_TO_SHIFTS |
6841 REVERSE_TO_SHIFTS |
6842 FIND_LSB_TO_FLOAT_CAST |
6843 FIND_MSB_TO_FLOAT_CAST |
6844 IMUL_HIGH_TO_MUL
6845 : 0));
6846
6847 do_vec_index_to_cond_assign(ir);
6848 lower_vector_insert(ir, true);
6849 lower_quadop_vector(ir, false);
6850 lower_noise(ir);
6851 if (options->MaxIfDepth == 0) {
6852 lower_discard(ir);
6853 }
6854
6855 do {
6856 progress = false;
6857
6858 progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
6859
6860 progress = do_common_optimization(ir, true, true, options,
6861 ctx->Const.NativeIntegers)
6862 || progress;
6863
6864 progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress;
6865
6866 } while (progress);
6867
6868 validate_ir_tree(ir);
6869 }
6870
6871 build_program_resource_list(ctx, prog);
6872
6873 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
6874 struct gl_program *linked_prog;
6875
6876 if (prog->_LinkedShaders[i] == NULL)
6877 continue;
6878
6879 linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
6880
6881 if (linked_prog) {
6882 _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
6883 linked_prog);
6884 if (!ctx->Driver.ProgramStringNotify(ctx,
6885 _mesa_shader_stage_to_program(i),
6886 linked_prog)) {
6887 _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
6888 NULL);
6889 _mesa_reference_program(ctx, &linked_prog, NULL);
6890 return GL_FALSE;
6891 }
6892 }
6893
6894 _mesa_reference_program(ctx, &linked_prog, NULL);
6895 }
6896
6897 return GL_TRUE;
6898 }
6899
6900 void
6901 st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi,
6902 const GLuint outputMapping[],
6903 struct pipe_stream_output_info *so)
6904 {
6905 struct gl_transform_feedback_info *info =
6906 &glsl_to_tgsi->shader_program->LinkedTransformFeedback;
6907 st_translate_stream_output_info2(info, outputMapping, so);
6908 }
6909
6910 void
6911 st_translate_stream_output_info2(struct gl_transform_feedback_info *info,
6912 const GLuint outputMapping[],
6913 struct pipe_stream_output_info *so)
6914 {
6915 unsigned i;
6916
6917 for (i = 0; i < info->NumOutputs; i++) {
6918 so->output[i].register_index =
6919 outputMapping[info->Outputs[i].OutputRegister];
6920 so->output[i].start_component = info->Outputs[i].ComponentOffset;
6921 so->output[i].num_components = info->Outputs[i].NumComponents;
6922 so->output[i].output_buffer = info->Outputs[i].OutputBuffer;
6923 so->output[i].dst_offset = info->Outputs[i].DstOffset;
6924 so->output[i].stream = info->Outputs[i].StreamId;
6925 }
6926
6927 for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
6928 so->stride[i] = info->Buffers[i].Stride;
6929 }
6930 so->num_outputs = info->NumOutputs;
6931 }
6932
6933 } /* extern "C" */