st/glsl: move nir linking loop to new function st_link_nir()
[mesa.git] / src / mesa / state_tracker / st_glsl_to_tgsi.cpp
1 /*
2 * Copyright (C) 2005-2007 Brian Paul All Rights Reserved.
3 * Copyright (C) 2008 VMware, Inc. All Rights Reserved.
4 * Copyright © 2010 Intel Corporation
5 * Copyright © 2011 Bryan Cain
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 * DEALINGS IN THE SOFTWARE.
25 */
26
27 /**
28 * \file glsl_to_tgsi.cpp
29 *
30 * Translate GLSL IR to TGSI.
31 */
32
33 #include "st_glsl_to_tgsi.h"
34
35 #include "compiler/glsl/glsl_parser_extras.h"
36 #include "compiler/glsl/ir_optimization.h"
37 #include "compiler/glsl/program.h"
38
39 #include "main/errors.h"
40 #include "main/shaderobj.h"
41 #include "main/uniforms.h"
42 #include "main/shaderapi.h"
43 #include "main/shaderimage.h"
44 #include "program/prog_instruction.h"
45
46 #include "pipe/p_context.h"
47 #include "pipe/p_screen.h"
48 #include "tgsi/tgsi_ureg.h"
49 #include "tgsi/tgsi_info.h"
50 #include "util/u_math.h"
51 #include "util/u_memory.h"
52 #include "st_glsl_types.h"
53 #include "st_program.h"
54 #include "st_mesa_to_tgsi.h"
55 #include "st_format.h"
56 #include "st_nir.h"
57 #include "st_shader_cache.h"
58 #include "st_glsl_to_tgsi_temprename.h"
59
60 #include "util/hash_table.h"
61 #include <algorithm>
62
63 #define PROGRAM_ANY_CONST ((1 << PROGRAM_STATE_VAR) | \
64 (1 << PROGRAM_CONSTANT) | \
65 (1 << PROGRAM_UNIFORM))
66
67 #define MAX_GLSL_TEXTURE_OFFSET 4
68
69 static unsigned is_precise(const ir_variable *ir)
70 {
71 if (!ir)
72 return 0;
73 return ir->data.precise || ir->data.invariant;
74 }
75
76 class variable_storage {
77 DECLARE_RZALLOC_CXX_OPERATORS(variable_storage)
78
79 public:
80 variable_storage(ir_variable *var, gl_register_file file, int index,
81 unsigned array_id = 0)
82 : file(file), index(index), component(0), var(var), array_id(array_id)
83 {
84 assert(file != PROGRAM_ARRAY || array_id != 0);
85 }
86
87 gl_register_file file;
88 int index;
89
90 /* Explicit component location. This is given in terms of the GLSL-style
91 * swizzles where each double is a single component, i.e. for 64-bit types
92 * it can only be 0 or 1.
93 */
94 int component;
95 ir_variable *var; /* variable that maps to this, if any */
96 unsigned array_id;
97 };
98
99 class immediate_storage : public exec_node {
100 public:
101 immediate_storage(gl_constant_value *values, int size32, int type)
102 {
103 memcpy(this->values, values, size32 * sizeof(gl_constant_value));
104 this->size32 = size32;
105 this->type = type;
106 }
107
108 /* doubles are stored across 2 gl_constant_values */
109 gl_constant_value values[4];
110 int size32; /**< Number of 32-bit components (1-4) */
111 int type; /**< GL_DOUBLE, GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */
112 };
113
114 static const st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR);
115 static const st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR);
116
117 struct inout_decl {
118 unsigned mesa_index;
119 unsigned array_id; /* TGSI ArrayID; 1-based: 0 means not an array */
120 unsigned size;
121 unsigned interp_loc;
122 unsigned gs_out_streams;
123 enum glsl_interp_mode interp;
124 enum glsl_base_type base_type;
125 ubyte usage_mask; /* GLSL-style usage-mask, i.e. single bit per double */
126 };
127
128 static struct inout_decl *
129 find_inout_array(struct inout_decl *decls, unsigned count, unsigned array_id)
130 {
131 assert(array_id != 0);
132
133 for (unsigned i = 0; i < count; i++) {
134 struct inout_decl *decl = &decls[i];
135
136 if (array_id == decl->array_id) {
137 return decl;
138 }
139 }
140
141 return NULL;
142 }
143
144 static enum glsl_base_type
145 find_array_type(struct inout_decl *decls, unsigned count, unsigned array_id)
146 {
147 if (!array_id)
148 return GLSL_TYPE_ERROR;
149 struct inout_decl *decl = find_inout_array(decls, count, array_id);
150 if (decl)
151 return decl->base_type;
152 return GLSL_TYPE_ERROR;
153 }
154
155 struct hwatomic_decl {
156 unsigned location;
157 unsigned binding;
158 unsigned size;
159 unsigned array_id;
160 };
161
162 struct glsl_to_tgsi_visitor : public ir_visitor {
163 public:
164 glsl_to_tgsi_visitor();
165 ~glsl_to_tgsi_visitor();
166
167 struct gl_context *ctx;
168 struct gl_program *prog;
169 struct gl_shader_program *shader_program;
170 struct gl_linked_shader *shader;
171 struct gl_shader_compiler_options *options;
172
173 int next_temp;
174
175 unsigned *array_sizes;
176 unsigned max_num_arrays;
177 unsigned next_array;
178
179 struct inout_decl inputs[4 * PIPE_MAX_SHADER_INPUTS];
180 unsigned num_inputs;
181 unsigned num_input_arrays;
182 struct inout_decl outputs[4 * PIPE_MAX_SHADER_OUTPUTS];
183 unsigned num_outputs;
184 unsigned num_output_arrays;
185
186 struct hwatomic_decl atomic_info[PIPE_MAX_HW_ATOMIC_BUFFERS];
187 unsigned num_atomics;
188 unsigned num_atomic_arrays;
189 int num_address_regs;
190 uint32_t samplers_used;
191 glsl_base_type sampler_types[PIPE_MAX_SAMPLERS];
192 enum tgsi_texture_type sampler_targets[PIPE_MAX_SAMPLERS];
193 int images_used;
194 int image_targets[PIPE_MAX_SHADER_IMAGES];
195 enum pipe_format image_formats[PIPE_MAX_SHADER_IMAGES];
196 bool indirect_addr_consts;
197 int wpos_transform_const;
198
199 bool native_integers;
200 bool have_sqrt;
201 bool have_fma;
202 bool use_shared_memory;
203 bool has_tex_txf_lz;
204 bool precise;
205 bool need_uarl;
206
207 variable_storage *find_variable_storage(ir_variable *var);
208
209 int add_constant(gl_register_file file, gl_constant_value values[8],
210 int size, int datatype, uint16_t *swizzle_out);
211
212 st_src_reg get_temp(const glsl_type *type);
213 void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr);
214
215 st_src_reg st_src_reg_for_double(double val);
216 st_src_reg st_src_reg_for_float(float val);
217 st_src_reg st_src_reg_for_int(int val);
218 st_src_reg st_src_reg_for_int64(int64_t val);
219 st_src_reg st_src_reg_for_type(enum glsl_base_type type, int val);
220
221 /**
222 * \name Visit methods
223 *
224 * As typical for the visitor pattern, there must be one \c visit method for
225 * each concrete subclass of \c ir_instruction. Virtual base classes within
226 * the hierarchy should not have \c visit methods.
227 */
228 /*@{*/
229 virtual void visit(ir_variable *);
230 virtual void visit(ir_loop *);
231 virtual void visit(ir_loop_jump *);
232 virtual void visit(ir_function_signature *);
233 virtual void visit(ir_function *);
234 virtual void visit(ir_expression *);
235 virtual void visit(ir_swizzle *);
236 virtual void visit(ir_dereference_variable *);
237 virtual void visit(ir_dereference_array *);
238 virtual void visit(ir_dereference_record *);
239 virtual void visit(ir_assignment *);
240 virtual void visit(ir_constant *);
241 virtual void visit(ir_call *);
242 virtual void visit(ir_return *);
243 virtual void visit(ir_discard *);
244 virtual void visit(ir_texture *);
245 virtual void visit(ir_if *);
246 virtual void visit(ir_emit_vertex *);
247 virtual void visit(ir_end_primitive *);
248 virtual void visit(ir_barrier *);
249 /*@}*/
250
251 void visit_expression(ir_expression *, st_src_reg *) ATTRIBUTE_NOINLINE;
252
253 void visit_atomic_counter_intrinsic(ir_call *);
254 void visit_ssbo_intrinsic(ir_call *);
255 void visit_membar_intrinsic(ir_call *);
256 void visit_shared_intrinsic(ir_call *);
257 void visit_image_intrinsic(ir_call *);
258 void visit_generic_intrinsic(ir_call *, unsigned op);
259
260 st_src_reg result;
261
262 /** List of variable_storage */
263 struct hash_table *variables;
264
265 /** List of immediate_storage */
266 exec_list immediates;
267 unsigned num_immediates;
268
269 /** List of glsl_to_tgsi_instruction */
270 exec_list instructions;
271
272 glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op,
273 st_dst_reg dst = undef_dst,
274 st_src_reg src0 = undef_src,
275 st_src_reg src1 = undef_src,
276 st_src_reg src2 = undef_src,
277 st_src_reg src3 = undef_src);
278
279 glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, unsigned op,
280 st_dst_reg dst, st_dst_reg dst1,
281 st_src_reg src0 = undef_src,
282 st_src_reg src1 = undef_src,
283 st_src_reg src2 = undef_src,
284 st_src_reg src3 = undef_src);
285
286 unsigned get_opcode(unsigned op,
287 st_dst_reg dst,
288 st_src_reg src0, st_src_reg src1);
289
290 /**
291 * Emit the correct dot-product instruction for the type of arguments
292 */
293 glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir,
294 st_dst_reg dst,
295 st_src_reg src0,
296 st_src_reg src1,
297 unsigned elements);
298
299 void emit_scalar(ir_instruction *ir, unsigned op,
300 st_dst_reg dst, st_src_reg src0);
301
302 void emit_scalar(ir_instruction *ir, unsigned op,
303 st_dst_reg dst, st_src_reg src0, st_src_reg src1);
304
305 void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0);
306
307 void get_deref_offsets(ir_dereference *ir,
308 unsigned *array_size,
309 unsigned *base,
310 uint16_t *index,
311 st_src_reg *reladdr,
312 bool opaque);
313 void calc_deref_offsets(ir_dereference *tail,
314 unsigned *array_elements,
315 uint16_t *index,
316 st_src_reg *indirect,
317 unsigned *location);
318 st_src_reg canonicalize_gather_offset(st_src_reg offset);
319
320 bool try_emit_mad(ir_expression *ir,
321 int mul_operand);
322 bool try_emit_mad_for_and_not(ir_expression *ir,
323 int mul_operand);
324
325 void emit_swz(ir_expression *ir);
326
327 bool process_move_condition(ir_rvalue *ir);
328
329 void simplify_cmp(void);
330
331 void rename_temp_registers(struct rename_reg_pair *renames);
332 void get_first_temp_read(int *first_reads);
333 void get_first_temp_write(int *first_writes);
334 void get_last_temp_read_first_temp_write(int *last_reads, int *first_writes);
335 void get_last_temp_write(int *last_writes);
336
337 void copy_propagate(void);
338 int eliminate_dead_code(void);
339
340 void merge_two_dsts(void);
341 void merge_registers(void);
342 void renumber_registers(void);
343
344 void emit_block_mov(ir_assignment *ir, const struct glsl_type *type,
345 st_dst_reg *l, st_src_reg *r,
346 st_src_reg *cond, bool cond_swap);
347
348 void *mem_ctx;
349 };
350
351 static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 0);
352 static st_dst_reg address_reg2 = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 1);
353 static st_dst_reg sampler_reladdr = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 2);
354
355 static void
356 fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3);
357
358 static void
359 fail_link(struct gl_shader_program *prog, const char *fmt, ...)
360 {
361 va_list args;
362 va_start(args, fmt);
363 ralloc_vasprintf_append(&prog->data->InfoLog, fmt, args);
364 va_end(args);
365
366 prog->data->LinkStatus = linking_failure;
367 }
368
369 int
370 swizzle_for_size(int size)
371 {
372 static const int size_swizzles[4] = {
373 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
374 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
375 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
376 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
377 };
378
379 assert((size >= 1) && (size <= 4));
380 return size_swizzles[size - 1];
381 }
382
383
384 glsl_to_tgsi_instruction *
385 glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
386 st_dst_reg dst, st_dst_reg dst1,
387 st_src_reg src0, st_src_reg src1,
388 st_src_reg src2, st_src_reg src3)
389 {
390 glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction();
391 int num_reladdr = 0, i, j;
392 bool dst_is_64bit[2];
393
394 op = get_opcode(op, dst, src0, src1);
395
396 /* If we have to do relative addressing, we want to load the ARL
397 * reg directly for one of the regs, and preload the other reladdr
398 * sources into temps.
399 */
400 num_reladdr += dst.reladdr != NULL || dst.reladdr2;
401 assert(!dst1.reladdr); /* should be lowered in earlier passes */
402 num_reladdr += src0.reladdr != NULL || src0.reladdr2 != NULL;
403 num_reladdr += src1.reladdr != NULL || src1.reladdr2 != NULL;
404 num_reladdr += src2.reladdr != NULL || src2.reladdr2 != NULL;
405 num_reladdr += src3.reladdr != NULL || src3.reladdr2 != NULL;
406
407 reladdr_to_temp(ir, &src3, &num_reladdr);
408 reladdr_to_temp(ir, &src2, &num_reladdr);
409 reladdr_to_temp(ir, &src1, &num_reladdr);
410 reladdr_to_temp(ir, &src0, &num_reladdr);
411
412 if (dst.reladdr || dst.reladdr2) {
413 if (dst.reladdr)
414 emit_arl(ir, address_reg, *dst.reladdr);
415 if (dst.reladdr2)
416 emit_arl(ir, address_reg2, *dst.reladdr2);
417 num_reladdr--;
418 }
419
420 assert(num_reladdr == 0);
421
422 /* inst->op has only 8 bits. */
423 STATIC_ASSERT(TGSI_OPCODE_LAST <= 255);
424
425 inst->op = op;
426 inst->precise = this->precise;
427 inst->info = tgsi_get_opcode_info(op);
428 inst->dst[0] = dst;
429 inst->dst[1] = dst1;
430 inst->src[0] = src0;
431 inst->src[1] = src1;
432 inst->src[2] = src2;
433 inst->src[3] = src3;
434 inst->is_64bit_expanded = false;
435 inst->ir = ir;
436 inst->dead_mask = 0;
437 inst->tex_offsets = NULL;
438 inst->tex_offset_num_offset = 0;
439 inst->saturate = 0;
440 inst->tex_shadow = 0;
441 /* default to float, for paths where this is not initialized
442 * (since 0==UINT which is likely wrong):
443 */
444 inst->tex_type = GLSL_TYPE_FLOAT;
445
446 /* Update indirect addressing status used by TGSI */
447 if (dst.reladdr || dst.reladdr2) {
448 switch(dst.file) {
449 case PROGRAM_STATE_VAR:
450 case PROGRAM_CONSTANT:
451 case PROGRAM_UNIFORM:
452 this->indirect_addr_consts = true;
453 break;
454 case PROGRAM_IMMEDIATE:
455 assert(!"immediates should not have indirect addressing");
456 break;
457 default:
458 break;
459 }
460 }
461 else {
462 for (i = 0; i < 4; i++) {
463 if(inst->src[i].reladdr) {
464 switch(inst->src[i].file) {
465 case PROGRAM_STATE_VAR:
466 case PROGRAM_CONSTANT:
467 case PROGRAM_UNIFORM:
468 this->indirect_addr_consts = true;
469 break;
470 case PROGRAM_IMMEDIATE:
471 assert(!"immediates should not have indirect addressing");
472 break;
473 default:
474 break;
475 }
476 }
477 }
478 }
479
480 /*
481 * This section contains the double processing.
482 * GLSL just represents doubles as single channel values,
483 * however most HW and TGSI represent doubles as pairs of register channels.
484 *
485 * so we have to fixup destination writemask/index and src swizzle/indexes.
486 * dest writemasks need to translate from single channel write mask
487 * to a dual-channel writemask, but also need to modify the index,
488 * if we are touching the Z,W fields in the pre-translated writemask.
489 *
490 * src channels have similiar index modifications along with swizzle
491 * changes to we pick the XY, ZW pairs from the correct index.
492 *
493 * GLSL [0].x -> TGSI [0].xy
494 * GLSL [0].y -> TGSI [0].zw
495 * GLSL [0].z -> TGSI [1].xy
496 * GLSL [0].w -> TGSI [1].zw
497 */
498 for (j = 0; j < 2; j++) {
499 dst_is_64bit[j] = glsl_base_type_is_64bit(inst->dst[j].type);
500 if (!dst_is_64bit[j] && inst->dst[j].file == PROGRAM_OUTPUT && inst->dst[j].type == GLSL_TYPE_ARRAY) {
501 enum glsl_base_type type = find_array_type(this->outputs, this->num_outputs, inst->dst[j].array_id);
502 if (glsl_base_type_is_64bit(type))
503 dst_is_64bit[j] = true;
504 }
505 }
506
507 if (dst_is_64bit[0] || dst_is_64bit[1] ||
508 glsl_base_type_is_64bit(inst->src[0].type)) {
509 glsl_to_tgsi_instruction *dinst = NULL;
510 int initial_src_swz[4], initial_src_idx[4];
511 int initial_dst_idx[2], initial_dst_writemask[2];
512 /* select the writemask for dst0 or dst1 */
513 unsigned writemask = inst->dst[1].file == PROGRAM_UNDEFINED ? inst->dst[0].writemask : inst->dst[1].writemask;
514
515 /* copy out the writemask, index and swizzles for all src/dsts. */
516 for (j = 0; j < 2; j++) {
517 initial_dst_writemask[j] = inst->dst[j].writemask;
518 initial_dst_idx[j] = inst->dst[j].index;
519 }
520
521 for (j = 0; j < 4; j++) {
522 initial_src_swz[j] = inst->src[j].swizzle;
523 initial_src_idx[j] = inst->src[j].index;
524 }
525
526 /*
527 * scan all the components in the dst writemask
528 * generate an instruction for each of them if required.
529 */
530 st_src_reg addr;
531 while (writemask) {
532
533 int i = u_bit_scan(&writemask);
534
535 /* before emitting the instruction, see if we have to adjust load / store
536 * address */
537 if (i > 1 && (inst->op == TGSI_OPCODE_LOAD || inst->op == TGSI_OPCODE_STORE) &&
538 addr.file == PROGRAM_UNDEFINED) {
539 /* We have to advance the buffer address by 16 */
540 addr = get_temp(glsl_type::uint_type);
541 emit_asm(ir, TGSI_OPCODE_UADD, st_dst_reg(addr),
542 inst->src[0], st_src_reg_for_int(16));
543 }
544
545 /* first time use previous instruction */
546 if (dinst == NULL) {
547 dinst = inst;
548 } else {
549 /* create a new instructions for subsequent attempts */
550 dinst = new(mem_ctx) glsl_to_tgsi_instruction();
551 *dinst = *inst;
552 dinst->next = NULL;
553 dinst->prev = NULL;
554 }
555 this->instructions.push_tail(dinst);
556 dinst->is_64bit_expanded = true;
557
558 /* modify the destination if we are splitting */
559 for (j = 0; j < 2; j++) {
560 if (dst_is_64bit[j]) {
561 dinst->dst[j].writemask = (i & 1) ? WRITEMASK_ZW : WRITEMASK_XY;
562 dinst->dst[j].index = initial_dst_idx[j];
563 if (i > 1) {
564 if (dinst->op == TGSI_OPCODE_LOAD || dinst->op == TGSI_OPCODE_STORE)
565 dinst->src[0] = addr;
566 if (dinst->op != TGSI_OPCODE_STORE)
567 dinst->dst[j].index++;
568 }
569 } else {
570 /* if we aren't writing to a double, just get the bit of the initial writemask
571 for this channel */
572 dinst->dst[j].writemask = initial_dst_writemask[j] & (1 << i);
573 }
574 }
575
576 /* modify the src registers */
577 for (j = 0; j < 4; j++) {
578 int swz = GET_SWZ(initial_src_swz[j], i);
579
580 if (glsl_base_type_is_64bit(dinst->src[j].type)) {
581 dinst->src[j].index = initial_src_idx[j];
582 if (swz > 1) {
583 dinst->src[j].double_reg2 = true;
584 dinst->src[j].index++;
585 }
586
587 if (swz & 1)
588 dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
589 else
590 dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y);
591
592 } else {
593 /* some opcodes are special case in what they use as sources
594 - [FUI]2D/[UI]2I64 is a float/[u]int src0, (D)LDEXP is integer src1 */
595 if (op == TGSI_OPCODE_F2D || op == TGSI_OPCODE_U2D || op == TGSI_OPCODE_I2D ||
596 op == TGSI_OPCODE_I2I64 || op == TGSI_OPCODE_U2I64 ||
597 op == TGSI_OPCODE_DLDEXP || op == TGSI_OPCODE_LDEXP ||
598 (op == TGSI_OPCODE_UCMP && dst_is_64bit[0])) {
599 dinst->src[j].swizzle = MAKE_SWIZZLE4(swz, swz, swz, swz);
600 }
601 }
602 }
603 }
604 inst = dinst;
605 } else {
606 this->instructions.push_tail(inst);
607 }
608
609
610 return inst;
611 }
612
613 glsl_to_tgsi_instruction *
614 glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
615 st_dst_reg dst,
616 st_src_reg src0, st_src_reg src1,
617 st_src_reg src2, st_src_reg src3)
618 {
619 return emit_asm(ir, op, dst, undef_dst, src0, src1, src2, src3);
620 }
621
622 /**
623 * Determines whether to use an integer, unsigned integer, or float opcode
624 * based on the operands and input opcode, then emits the result.
625 */
626 unsigned
627 glsl_to_tgsi_visitor::get_opcode(unsigned op,
628 st_dst_reg dst,
629 st_src_reg src0, st_src_reg src1)
630 {
631 enum glsl_base_type type = GLSL_TYPE_FLOAT;
632
633 if (op == TGSI_OPCODE_MOV)
634 return op;
635
636 assert(src0.type != GLSL_TYPE_ARRAY);
637 assert(src0.type != GLSL_TYPE_STRUCT);
638 assert(src1.type != GLSL_TYPE_ARRAY);
639 assert(src1.type != GLSL_TYPE_STRUCT);
640
641 if (is_resource_instruction(op))
642 type = src1.type;
643 else if (src0.type == GLSL_TYPE_INT64 || src1.type == GLSL_TYPE_INT64)
644 type = GLSL_TYPE_INT64;
645 else if (src0.type == GLSL_TYPE_UINT64 || src1.type == GLSL_TYPE_UINT64)
646 type = GLSL_TYPE_UINT64;
647 else if (src0.type == GLSL_TYPE_DOUBLE || src1.type == GLSL_TYPE_DOUBLE)
648 type = GLSL_TYPE_DOUBLE;
649 else if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
650 type = GLSL_TYPE_FLOAT;
651 else if (native_integers)
652 type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type;
653
654 #define case7(c, f, i, u, d, i64, ui64) \
655 case TGSI_OPCODE_##c: \
656 if (type == GLSL_TYPE_UINT64) \
657 op = TGSI_OPCODE_##ui64; \
658 else if (type == GLSL_TYPE_INT64) \
659 op = TGSI_OPCODE_##i64; \
660 else if (type == GLSL_TYPE_DOUBLE) \
661 op = TGSI_OPCODE_##d; \
662 else if (type == GLSL_TYPE_INT) \
663 op = TGSI_OPCODE_##i; \
664 else if (type == GLSL_TYPE_UINT) \
665 op = TGSI_OPCODE_##u; \
666 else \
667 op = TGSI_OPCODE_##f; \
668 break;
669
670 #define casecomp(c, f, i, u, d, i64, ui64) \
671 case TGSI_OPCODE_##c: \
672 if (type == GLSL_TYPE_INT64) \
673 op = TGSI_OPCODE_##i64; \
674 else if (type == GLSL_TYPE_UINT64) \
675 op = TGSI_OPCODE_##ui64; \
676 else if (type == GLSL_TYPE_DOUBLE) \
677 op = TGSI_OPCODE_##d; \
678 else if (type == GLSL_TYPE_INT || type == GLSL_TYPE_SUBROUTINE) \
679 op = TGSI_OPCODE_##i; \
680 else if (type == GLSL_TYPE_UINT) \
681 op = TGSI_OPCODE_##u; \
682 else if (native_integers) \
683 op = TGSI_OPCODE_##f; \
684 else \
685 op = TGSI_OPCODE_##c; \
686 break;
687
688 switch(op) {
689 /* Some instructions are initially selected without considering the type.
690 * This fixes the type:
691 *
692 * INIT FLOAT SINT UINT DOUBLE SINT64 UINT64
693 */
694 case7(ADD, ADD, UADD, UADD, DADD, U64ADD, U64ADD);
695 case7(CEIL, CEIL, LAST, LAST, DCEIL, LAST, LAST);
696 case7(DIV, DIV, IDIV, UDIV, DDIV, I64DIV, U64DIV);
697 case7(FMA, FMA, UMAD, UMAD, DFMA, LAST, LAST);
698 case7(FLR, FLR, LAST, LAST, DFLR, LAST, LAST);
699 case7(FRC, FRC, LAST, LAST, DFRAC, LAST, LAST);
700 case7(MUL, MUL, UMUL, UMUL, DMUL, U64MUL, U64MUL);
701 case7(MAD, MAD, UMAD, UMAD, DMAD, LAST, LAST);
702 case7(MAX, MAX, IMAX, UMAX, DMAX, I64MAX, U64MAX);
703 case7(MIN, MIN, IMIN, UMIN, DMIN, I64MIN, U64MIN);
704 case7(RCP, RCP, LAST, LAST, DRCP, LAST, LAST);
705 case7(ROUND, ROUND,LAST, LAST, DROUND, LAST, LAST);
706 case7(RSQ, RSQ, LAST, LAST, DRSQ, LAST, LAST);
707 case7(SQRT, SQRT, LAST, LAST, DSQRT, LAST, LAST);
708 case7(SSG, SSG, ISSG, ISSG, DSSG, I64SSG, I64SSG);
709 case7(TRUNC, TRUNC,LAST, LAST, DTRUNC, LAST, LAST);
710
711 case7(MOD, LAST, MOD, UMOD, LAST, I64MOD, U64MOD);
712 case7(SHL, LAST, SHL, SHL, LAST, U64SHL, U64SHL);
713 case7(IBFE, LAST, IBFE, UBFE, LAST, LAST, LAST);
714 case7(IMSB, LAST, IMSB, UMSB, LAST, LAST, LAST);
715 case7(IMUL_HI, LAST, IMUL_HI, UMUL_HI, LAST, LAST, LAST);
716 case7(ISHR, LAST, ISHR, USHR, LAST, I64SHR, U64SHR);
717 case7(ATOMIMAX,LAST, ATOMIMAX,ATOMUMAX,LAST, LAST, LAST);
718 case7(ATOMIMIN,LAST, ATOMIMIN,ATOMUMIN,LAST, LAST, LAST);
719
720 casecomp(SEQ, FSEQ, USEQ, USEQ, DSEQ, U64SEQ, U64SEQ);
721 casecomp(SNE, FSNE, USNE, USNE, DSNE, U64SNE, U64SNE);
722 casecomp(SGE, FSGE, ISGE, USGE, DSGE, I64SGE, U64SGE);
723 casecomp(SLT, FSLT, ISLT, USLT, DSLT, I64SLT, U64SLT);
724
725 default: break;
726 }
727
728 assert(op != TGSI_OPCODE_LAST);
729 return op;
730 }
731
732 glsl_to_tgsi_instruction *
733 glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
734 st_dst_reg dst, st_src_reg src0, st_src_reg src1,
735 unsigned elements)
736 {
737 static const unsigned dot_opcodes[] = {
738 TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4
739 };
740
741 return emit_asm(ir, dot_opcodes[elements - 2], dst, src0, src1);
742 }
743
744 /**
745 * Emits TGSI scalar opcodes to produce unique answers across channels.
746 *
747 * Some TGSI opcodes are scalar-only, like ARB_fp/vp. The src X
748 * channel determines the result across all channels. So to do a vec4
749 * of this operation, we want to emit a scalar per source channel used
750 * to produce dest channels.
751 */
752 void
753 glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
754 st_dst_reg dst,
755 st_src_reg orig_src0, st_src_reg orig_src1)
756 {
757 int i, j;
758 int done_mask = ~dst.writemask;
759
760 /* TGSI RCP is a scalar operation splatting results to all channels,
761 * like ARB_fp/vp. So emit as many RCPs as necessary to cover our
762 * dst channels.
763 */
764 for (i = 0; i < 4; i++) {
765 GLuint this_mask = (1 << i);
766 st_src_reg src0 = orig_src0;
767 st_src_reg src1 = orig_src1;
768
769 if (done_mask & this_mask)
770 continue;
771
772 GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
773 GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
774 for (j = i + 1; j < 4; j++) {
775 /* If there is another enabled component in the destination that is
776 * derived from the same inputs, generate its value on this pass as
777 * well.
778 */
779 if (!(done_mask & (1 << j)) &&
780 GET_SWZ(src0.swizzle, j) == src0_swiz &&
781 GET_SWZ(src1.swizzle, j) == src1_swiz) {
782 this_mask |= (1 << j);
783 }
784 }
785 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
786 src0_swiz, src0_swiz);
787 src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
788 src1_swiz, src1_swiz);
789
790 dst.writemask = this_mask;
791 emit_asm(ir, op, dst, src0, src1);
792 done_mask |= this_mask;
793 }
794 }
795
796 void
797 glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
798 st_dst_reg dst, st_src_reg src0)
799 {
800 st_src_reg undef = undef_src;
801
802 undef.swizzle = SWIZZLE_XXXX;
803
804 emit_scalar(ir, op, dst, src0, undef);
805 }
806
807 void
808 glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
809 st_dst_reg dst, st_src_reg src0)
810 {
811 int op = TGSI_OPCODE_ARL;
812
813 if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT) {
814 if (!this->need_uarl && src0.is_legal_tgsi_address_operand())
815 return;
816
817 op = TGSI_OPCODE_UARL;
818 }
819
820 assert(dst.file == PROGRAM_ADDRESS);
821 if (dst.index >= this->num_address_regs)
822 this->num_address_regs = dst.index + 1;
823
824 emit_asm(NULL, op, dst, src0);
825 }
826
827 int
828 glsl_to_tgsi_visitor::add_constant(gl_register_file file,
829 gl_constant_value values[8], int size, int datatype,
830 uint16_t *swizzle_out)
831 {
832 if (file == PROGRAM_CONSTANT) {
833 GLuint swizzle = swizzle_out ? *swizzle_out : 0;
834 int result = _mesa_add_typed_unnamed_constant(this->prog->Parameters, values,
835 size, datatype, &swizzle);
836 if (swizzle_out)
837 *swizzle_out = swizzle;
838 return result;
839 }
840
841 assert(file == PROGRAM_IMMEDIATE);
842
843 int index = 0;
844 immediate_storage *entry;
845 int size32 = size * ((datatype == GL_DOUBLE ||
846 datatype == GL_INT64_ARB ||
847 datatype == GL_UNSIGNED_INT64_ARB)? 2 : 1);
848 int i;
849
850 /* Search immediate storage to see if we already have an identical
851 * immediate that we can use instead of adding a duplicate entry.
852 */
853 foreach_in_list(immediate_storage, entry, &this->immediates) {
854 immediate_storage *tmp = entry;
855
856 for (i = 0; i * 4 < size32; i++) {
857 int slot_size = MIN2(size32 - (i * 4), 4);
858 if (tmp->type != datatype || tmp->size32 != slot_size)
859 break;
860 if (memcmp(tmp->values, &values[i * 4],
861 slot_size * sizeof(gl_constant_value)))
862 break;
863
864 /* Everything matches, keep going until the full size is matched */
865 tmp = (immediate_storage *)tmp->next;
866 }
867
868 /* The full value matched */
869 if (i * 4 >= size32)
870 return index;
871
872 index++;
873 }
874
875 for (i = 0; i * 4 < size32; i++) {
876 int slot_size = MIN2(size32 - (i * 4), 4);
877 /* Add this immediate to the list. */
878 entry = new(mem_ctx) immediate_storage(&values[i * 4], slot_size, datatype);
879 this->immediates.push_tail(entry);
880 this->num_immediates++;
881 }
882 return index;
883 }
884
885 st_src_reg
886 glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
887 {
888 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT);
889 union gl_constant_value uval;
890
891 uval.f = val;
892 src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle);
893
894 return src;
895 }
896
897 st_src_reg
898 glsl_to_tgsi_visitor::st_src_reg_for_double(double val)
899 {
900 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_DOUBLE);
901 union gl_constant_value uval[2];
902
903 memcpy(uval, &val, sizeof(uval));
904 src.index = add_constant(src.file, uval, 1, GL_DOUBLE, &src.swizzle);
905 src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y);
906 return src;
907 }
908
909 st_src_reg
910 glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
911 {
912 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT);
913 union gl_constant_value uval;
914
915 assert(native_integers);
916
917 uval.i = val;
918 src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle);
919
920 return src;
921 }
922
923 st_src_reg
924 glsl_to_tgsi_visitor::st_src_reg_for_int64(int64_t val)
925 {
926 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT64);
927 union gl_constant_value uval[2];
928
929 memcpy(uval, &val, sizeof(uval));
930 src.index = add_constant(src.file, uval, 1, GL_DOUBLE, &src.swizzle);
931 src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y);
932
933 return src;
934 }
935
936 st_src_reg
937 glsl_to_tgsi_visitor::st_src_reg_for_type(enum glsl_base_type type, int val)
938 {
939 if (native_integers)
940 return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) :
941 st_src_reg_for_int(val);
942 else
943 return st_src_reg_for_float(val);
944 }
945
946 static int
947 attrib_type_size(const struct glsl_type *type, bool is_vs_input)
948 {
949 return type->count_attribute_slots(is_vs_input);
950 }
951
952 static int
953 type_size(const struct glsl_type *type)
954 {
955 return type->count_attribute_slots(false);
956 }
957
958 static void
959 add_buffer_to_load_and_stores(glsl_to_tgsi_instruction *inst, st_src_reg *buf,
960 exec_list *instructions, ir_constant *access)
961 {
962 /**
963 * emit_asm() might have actually split the op into pieces, e.g. for
964 * double stores. We have to go back and fix up all the generated ops.
965 */
966 unsigned op = inst->op;
967 do {
968 inst->resource = *buf;
969 if (access)
970 inst->buffer_access = access->value.u[0];
971
972 if (inst == instructions->get_head_raw())
973 break;
974 inst = (glsl_to_tgsi_instruction *)inst->get_prev();
975
976 if (inst->op == TGSI_OPCODE_UADD) {
977 if (inst == instructions->get_head_raw())
978 break;
979 inst = (glsl_to_tgsi_instruction *)inst->get_prev();
980 }
981 } while (inst->op == op && inst->resource.file == PROGRAM_UNDEFINED);
982 }
983
984 /**
985 * If the given GLSL type is an array or matrix or a structure containing
986 * an array/matrix member, return true. Else return false.
987 *
988 * This is used to determine which kind of temp storage (PROGRAM_TEMPORARY
989 * or PROGRAM_ARRAY) should be used for variables of this type. Anytime
990 * we have an array that might be indexed with a variable, we need to use
991 * the later storage type.
992 */
993 static bool
994 type_has_array_or_matrix(const glsl_type *type)
995 {
996 if (type->is_array() || type->is_matrix())
997 return true;
998
999 if (type->is_record()) {
1000 for (unsigned i = 0; i < type->length; i++) {
1001 if (type_has_array_or_matrix(type->fields.structure[i].type)) {
1002 return true;
1003 }
1004 }
1005 }
1006
1007 return false;
1008 }
1009
1010
1011 /**
1012 * In the initial pass of codegen, we assign temporary numbers to
1013 * intermediate results. (not SSA -- variable assignments will reuse
1014 * storage).
1015 */
1016 st_src_reg
1017 glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
1018 {
1019 st_src_reg src;
1020
1021 src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
1022 src.reladdr = NULL;
1023 src.negate = 0;
1024 src.abs = 0;
1025
1026 if (!options->EmitNoIndirectTemp && type_has_array_or_matrix(type)) {
1027 if (next_array >= max_num_arrays) {
1028 max_num_arrays += 32;
1029 array_sizes = (unsigned*)
1030 realloc(array_sizes, sizeof(array_sizes[0]) * max_num_arrays);
1031 }
1032
1033 src.file = PROGRAM_ARRAY;
1034 src.index = 0;
1035 src.array_id = next_array + 1;
1036 array_sizes[next_array] = type_size(type);
1037 ++next_array;
1038
1039 } else {
1040 src.file = PROGRAM_TEMPORARY;
1041 src.index = next_temp;
1042 next_temp += type_size(type);
1043 }
1044
1045 if (type->is_array() || type->is_record()) {
1046 src.swizzle = SWIZZLE_NOOP;
1047 } else {
1048 src.swizzle = swizzle_for_size(type->vector_elements);
1049 }
1050
1051 return src;
1052 }
1053
1054 variable_storage *
1055 glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var)
1056 {
1057 struct hash_entry *entry;
1058
1059 entry = _mesa_hash_table_search(this->variables, var);
1060 if (!entry)
1061 return NULL;
1062
1063 return (variable_storage *)entry->data;
1064 }
1065
1066 void
1067 glsl_to_tgsi_visitor::visit(ir_variable *ir)
1068 {
1069 if (strcmp(ir->name, "gl_FragCoord") == 0) {
1070 this->prog->OriginUpperLeft = ir->data.origin_upper_left;
1071 this->prog->PixelCenterInteger = ir->data.pixel_center_integer;
1072 }
1073
1074 if (ir->data.mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
1075 unsigned int i;
1076 const ir_state_slot *const slots = ir->get_state_slots();
1077 assert(slots != NULL);
1078
1079 /* Check if this statevar's setup in the STATE file exactly
1080 * matches how we'll want to reference it as a
1081 * struct/array/whatever. If not, then we need to move it into
1082 * temporary storage and hope that it'll get copy-propagated
1083 * out.
1084 */
1085 for (i = 0; i < ir->get_num_state_slots(); i++) {
1086 if (slots[i].swizzle != SWIZZLE_XYZW) {
1087 break;
1088 }
1089 }
1090
1091 variable_storage *storage;
1092 st_dst_reg dst;
1093 if (i == ir->get_num_state_slots()) {
1094 /* We'll set the index later. */
1095 storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
1096
1097 _mesa_hash_table_insert(this->variables, ir, storage);
1098
1099 dst = undef_dst;
1100 } else {
1101 /* The variable_storage constructor allocates slots based on the size
1102 * of the type. However, this had better match the number of state
1103 * elements that we're going to copy into the new temporary.
1104 */
1105 assert((int) ir->get_num_state_slots() == type_size(ir->type));
1106
1107 dst = st_dst_reg(get_temp(ir->type));
1108
1109 storage = new(mem_ctx) variable_storage(ir, dst.file, dst.index,
1110 dst.array_id);
1111
1112 _mesa_hash_table_insert(this->variables, ir, storage);
1113 }
1114
1115
1116 for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) {
1117 int index = _mesa_add_state_reference(this->prog->Parameters,
1118 (gl_state_index *)slots[i].tokens);
1119
1120 if (storage->file == PROGRAM_STATE_VAR) {
1121 if (storage->index == -1) {
1122 storage->index = index;
1123 } else {
1124 assert(index == storage->index + (int)i);
1125 }
1126 } else {
1127 /* We use GLSL_TYPE_FLOAT here regardless of the actual type of
1128 * the data being moved since MOV does not care about the type of
1129 * data it is moving, and we don't want to declare registers with
1130 * array or struct types.
1131 */
1132 st_src_reg src(PROGRAM_STATE_VAR, index, GLSL_TYPE_FLOAT);
1133 src.swizzle = slots[i].swizzle;
1134 emit_asm(ir, TGSI_OPCODE_MOV, dst, src);
1135 /* even a float takes up a whole vec4 reg in a struct/array. */
1136 dst.index++;
1137 }
1138 }
1139
1140 if (storage->file == PROGRAM_TEMPORARY &&
1141 dst.index != storage->index + (int) ir->get_num_state_slots()) {
1142 fail_link(this->shader_program,
1143 "failed to load builtin uniform `%s' (%d/%d regs loaded)\n",
1144 ir->name, dst.index - storage->index,
1145 type_size(ir->type));
1146 }
1147 }
1148 }
1149
1150 void
1151 glsl_to_tgsi_visitor::visit(ir_loop *ir)
1152 {
1153 emit_asm(NULL, TGSI_OPCODE_BGNLOOP);
1154
1155 visit_exec_list(&ir->body_instructions, this);
1156
1157 emit_asm(NULL, TGSI_OPCODE_ENDLOOP);
1158 }
1159
1160 void
1161 glsl_to_tgsi_visitor::visit(ir_loop_jump *ir)
1162 {
1163 switch (ir->mode) {
1164 case ir_loop_jump::jump_break:
1165 emit_asm(NULL, TGSI_OPCODE_BRK);
1166 break;
1167 case ir_loop_jump::jump_continue:
1168 emit_asm(NULL, TGSI_OPCODE_CONT);
1169 break;
1170 }
1171 }
1172
1173
1174 void
1175 glsl_to_tgsi_visitor::visit(ir_function_signature *ir)
1176 {
1177 assert(0);
1178 (void)ir;
1179 }
1180
1181 void
1182 glsl_to_tgsi_visitor::visit(ir_function *ir)
1183 {
1184 /* Ignore function bodies other than main() -- we shouldn't see calls to
1185 * them since they should all be inlined before we get to glsl_to_tgsi.
1186 */
1187 if (strcmp(ir->name, "main") == 0) {
1188 const ir_function_signature *sig;
1189 exec_list empty;
1190
1191 sig = ir->matching_signature(NULL, &empty, false);
1192
1193 assert(sig);
1194
1195 foreach_in_list(ir_instruction, ir, &sig->body) {
1196 ir->accept(this);
1197 }
1198 }
1199 }
1200
1201 bool
1202 glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
1203 {
1204 int nonmul_operand = 1 - mul_operand;
1205 st_src_reg a, b, c;
1206 st_dst_reg result_dst;
1207
1208 ir_expression *expr = ir->operands[mul_operand]->as_expression();
1209 if (!expr || expr->operation != ir_binop_mul)
1210 return false;
1211
1212 expr->operands[0]->accept(this);
1213 a = this->result;
1214 expr->operands[1]->accept(this);
1215 b = this->result;
1216 ir->operands[nonmul_operand]->accept(this);
1217 c = this->result;
1218
1219 this->result = get_temp(ir->type);
1220 result_dst = st_dst_reg(this->result);
1221 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1222 emit_asm(ir, TGSI_OPCODE_MAD, result_dst, a, b, c);
1223
1224 return true;
1225 }
1226
1227 /**
1228 * Emit MAD(a, -b, a) instead of AND(a, NOT(b))
1229 *
1230 * The logic values are 1.0 for true and 0.0 for false. Logical-and is
1231 * implemented using multiplication, and logical-or is implemented using
1232 * addition. Logical-not can be implemented as (true - x), or (1.0 - x).
1233 * As result, the logical expression (a & !b) can be rewritten as:
1234 *
1235 * - a * !b
1236 * - a * (1 - b)
1237 * - (a * 1) - (a * b)
1238 * - a + -(a * b)
1239 * - a + (a * -b)
1240 *
1241 * This final expression can be implemented as a single MAD(a, -b, a)
1242 * instruction.
1243 */
1244 bool
1245 glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
1246 {
1247 const int other_operand = 1 - try_operand;
1248 st_src_reg a, b;
1249
1250 ir_expression *expr = ir->operands[try_operand]->as_expression();
1251 if (!expr || expr->operation != ir_unop_logic_not)
1252 return false;
1253
1254 ir->operands[other_operand]->accept(this);
1255 a = this->result;
1256 expr->operands[0]->accept(this);
1257 b = this->result;
1258
1259 b.negate = ~b.negate;
1260
1261 this->result = get_temp(ir->type);
1262 emit_asm(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a);
1263
1264 return true;
1265 }
1266
1267 void
1268 glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
1269 st_src_reg *reg, int *num_reladdr)
1270 {
1271 if (!reg->reladdr && !reg->reladdr2)
1272 return;
1273
1274 if (reg->reladdr) emit_arl(ir, address_reg, *reg->reladdr);
1275 if (reg->reladdr2) emit_arl(ir, address_reg2, *reg->reladdr2);
1276
1277 if (*num_reladdr != 1) {
1278 st_src_reg temp = get_temp(glsl_type::get_instance(reg->type, 4, 1));
1279
1280 emit_asm(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg);
1281 *reg = temp;
1282 }
1283
1284 (*num_reladdr)--;
1285 }
1286
1287 void
1288 glsl_to_tgsi_visitor::visit(ir_expression *ir)
1289 {
1290 st_src_reg op[ARRAY_SIZE(ir->operands)];
1291
1292 /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c)
1293 */
1294 if (!this->precise && ir->operation == ir_binop_add) {
1295 if (try_emit_mad(ir, 1))
1296 return;
1297 if (try_emit_mad(ir, 0))
1298 return;
1299 }
1300
1301 /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b))
1302 */
1303 if (!native_integers && ir->operation == ir_binop_logic_and) {
1304 if (try_emit_mad_for_and_not(ir, 1))
1305 return;
1306 if (try_emit_mad_for_and_not(ir, 0))
1307 return;
1308 }
1309
1310 if (ir->operation == ir_quadop_vector)
1311 assert(!"ir_quadop_vector should have been lowered");
1312
1313 for (unsigned int operand = 0; operand < ir->num_operands; operand++) {
1314 this->result.file = PROGRAM_UNDEFINED;
1315 ir->operands[operand]->accept(this);
1316 if (this->result.file == PROGRAM_UNDEFINED) {
1317 printf("Failed to get tree for expression operand:\n");
1318 ir->operands[operand]->print();
1319 printf("\n");
1320 exit(1);
1321 }
1322 op[operand] = this->result;
1323
1324 /* Matrix expression operands should have been broken down to vector
1325 * operations already.
1326 */
1327 assert(!ir->operands[operand]->type->is_matrix());
1328 }
1329
1330 visit_expression(ir, op);
1331 }
1332
1333 /* The non-recursive part of the expression visitor lives in a separate
1334 * function and should be prevented from being inlined, to avoid a stack
1335 * explosion when deeply nested expressions are visited.
1336 */
1337 void
1338 glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
1339 {
1340 st_src_reg result_src;
1341 st_dst_reg result_dst;
1342
1343 int vector_elements = ir->operands[0]->type->vector_elements;
1344 if (ir->operands[1]) {
1345 vector_elements = MAX2(vector_elements,
1346 ir->operands[1]->type->vector_elements);
1347 }
1348
1349 this->result.file = PROGRAM_UNDEFINED;
1350
1351 /* Storage for our result. Ideally for an assignment we'd be using
1352 * the actual storage for the result here, instead.
1353 */
1354 result_src = get_temp(ir->type);
1355 /* convenience for the emit functions below. */
1356 result_dst = st_dst_reg(result_src);
1357 /* Limit writes to the channels that will be used by result_src later.
1358 * This does limit this temp's use as a temporary for multi-instruction
1359 * sequences.
1360 */
1361 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
1362
1363 switch (ir->operation) {
1364 case ir_unop_logic_not:
1365 if (result_dst.type != GLSL_TYPE_FLOAT)
1366 emit_asm(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
1367 else {
1368 /* Previously 'SEQ dst, src, 0.0' was used for this. However, many
1369 * older GPUs implement SEQ using multiple instructions (i915 uses two
1370 * SGE instructions and a MUL instruction). Since our logic values are
1371 * 0.0 and 1.0, 1-x also implements !x.
1372 */
1373 op[0].negate = ~op[0].negate;
1374 emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0));
1375 }
1376 break;
1377 case ir_unop_neg:
1378 if (result_dst.type == GLSL_TYPE_INT64 || result_dst.type == GLSL_TYPE_UINT64)
1379 emit_asm(ir, TGSI_OPCODE_I64NEG, result_dst, op[0]);
1380 else if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT)
1381 emit_asm(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
1382 else if (result_dst.type == GLSL_TYPE_DOUBLE)
1383 emit_asm(ir, TGSI_OPCODE_DNEG, result_dst, op[0]);
1384 else {
1385 op[0].negate = ~op[0].negate;
1386 result_src = op[0];
1387 }
1388 break;
1389 case ir_unop_subroutine_to_int:
1390 emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
1391 break;
1392 case ir_unop_abs:
1393 if (result_dst.type == GLSL_TYPE_FLOAT)
1394 emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0].get_abs());
1395 else if (result_dst.type == GLSL_TYPE_DOUBLE)
1396 emit_asm(ir, TGSI_OPCODE_DABS, result_dst, op[0]);
1397 else if (result_dst.type == GLSL_TYPE_INT64 || result_dst.type == GLSL_TYPE_UINT64)
1398 emit_asm(ir, TGSI_OPCODE_I64ABS, result_dst, op[0]);
1399 else
1400 emit_asm(ir, TGSI_OPCODE_IABS, result_dst, op[0]);
1401 break;
1402 case ir_unop_sign:
1403 emit_asm(ir, TGSI_OPCODE_SSG, result_dst, op[0]);
1404 break;
1405 case ir_unop_rcp:
1406 emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]);
1407 break;
1408
1409 case ir_unop_exp2:
1410 emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]);
1411 break;
1412 case ir_unop_exp:
1413 assert(!"not reached: should be handled by exp_to_exp2");
1414 break;
1415 case ir_unop_log:
1416 assert(!"not reached: should be handled by log_to_log2");
1417 break;
1418 case ir_unop_log2:
1419 emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]);
1420 break;
1421 case ir_unop_sin:
1422 emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]);
1423 break;
1424 case ir_unop_cos:
1425 emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]);
1426 break;
1427 case ir_unop_saturate: {
1428 glsl_to_tgsi_instruction *inst;
1429 inst = emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
1430 inst->saturate = true;
1431 break;
1432 }
1433
1434 case ir_unop_dFdx:
1435 case ir_unop_dFdx_coarse:
1436 emit_asm(ir, TGSI_OPCODE_DDX, result_dst, op[0]);
1437 break;
1438 case ir_unop_dFdx_fine:
1439 emit_asm(ir, TGSI_OPCODE_DDX_FINE, result_dst, op[0]);
1440 break;
1441 case ir_unop_dFdy:
1442 case ir_unop_dFdy_coarse:
1443 case ir_unop_dFdy_fine:
1444 {
1445 /* The X component contains 1 or -1 depending on whether the framebuffer
1446 * is a FBO or the window system buffer, respectively.
1447 * It is then multiplied with the source operand of DDY.
1448 */
1449 static const gl_state_index transform_y_state[STATE_LENGTH]
1450 = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM };
1451
1452 unsigned transform_y_index =
1453 _mesa_add_state_reference(this->prog->Parameters,
1454 transform_y_state);
1455
1456 st_src_reg transform_y = st_src_reg(PROGRAM_STATE_VAR,
1457 transform_y_index,
1458 glsl_type::vec4_type);
1459 transform_y.swizzle = SWIZZLE_XXXX;
1460
1461 st_src_reg temp = get_temp(glsl_type::vec4_type);
1462
1463 emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(temp), transform_y, op[0]);
1464 emit_asm(ir, ir->operation == ir_unop_dFdy_fine ?
1465 TGSI_OPCODE_DDY_FINE : TGSI_OPCODE_DDY, result_dst, temp);
1466 break;
1467 }
1468
1469 case ir_unop_frexp_sig:
1470 emit_asm(ir, TGSI_OPCODE_DFRACEXP, result_dst, undef_dst, op[0]);
1471 break;
1472
1473 case ir_unop_frexp_exp:
1474 emit_asm(ir, TGSI_OPCODE_DFRACEXP, undef_dst, result_dst, op[0]);
1475 break;
1476
1477 case ir_unop_noise: {
1478 /* At some point, a motivated person could add a better
1479 * implementation of noise. Currently not even the nvidia
1480 * binary drivers do anything more than this. In any case, the
1481 * place to do this is in the GL state tracker, not the poor
1482 * driver.
1483 */
1484 emit_asm(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5));
1485 break;
1486 }
1487
1488 case ir_binop_add:
1489 emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
1490 break;
1491 case ir_binop_sub:
1492 op[1].negate = ~op[1].negate;
1493 emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
1494 break;
1495
1496 case ir_binop_mul:
1497 emit_asm(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
1498 break;
1499 case ir_binop_div:
1500 emit_asm(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]);
1501 break;
1502 case ir_binop_mod:
1503 if (result_dst.type == GLSL_TYPE_FLOAT)
1504 assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
1505 else
1506 emit_asm(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]);
1507 break;
1508
1509 case ir_binop_less:
1510 emit_asm(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]);
1511 break;
1512 case ir_binop_gequal:
1513 emit_asm(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]);
1514 break;
1515 case ir_binop_equal:
1516 emit_asm(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
1517 break;
1518 case ir_binop_nequal:
1519 emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1520 break;
1521 case ir_binop_all_equal:
1522 /* "==" operator producing a scalar boolean. */
1523 if (ir->operands[0]->type->is_vector() ||
1524 ir->operands[1]->type->is_vector()) {
1525 st_src_reg temp = get_temp(native_integers ?
1526 glsl_type::uvec4_type :
1527 glsl_type::vec4_type);
1528
1529 if (native_integers) {
1530 st_dst_reg temp_dst = st_dst_reg(temp);
1531 st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp);
1532
1533 if (ir->operands[0]->type->is_boolean() &&
1534 ir->operands[1]->as_constant() &&
1535 ir->operands[1]->as_constant()->is_one()) {
1536 emit_asm(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), op[0]);
1537 } else {
1538 emit_asm(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]);
1539 }
1540
1541 /* Emit 1-3 AND operations to combine the SEQ results. */
1542 switch (ir->operands[0]->type->vector_elements) {
1543 case 2:
1544 break;
1545 case 3:
1546 temp_dst.writemask = WRITEMASK_Y;
1547 temp1.swizzle = SWIZZLE_YYYY;
1548 temp2.swizzle = SWIZZLE_ZZZZ;
1549 emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
1550 break;
1551 case 4:
1552 temp_dst.writemask = WRITEMASK_X;
1553 temp1.swizzle = SWIZZLE_XXXX;
1554 temp2.swizzle = SWIZZLE_YYYY;
1555 emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
1556 temp_dst.writemask = WRITEMASK_Y;
1557 temp1.swizzle = SWIZZLE_ZZZZ;
1558 temp2.swizzle = SWIZZLE_WWWW;
1559 emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
1560 }
1561
1562 temp1.swizzle = SWIZZLE_XXXX;
1563 temp2.swizzle = SWIZZLE_YYYY;
1564 emit_asm(ir, TGSI_OPCODE_AND, result_dst, temp1, temp2);
1565 } else {
1566 emit_asm(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
1567
1568 /* After the dot-product, the value will be an integer on the
1569 * range [0,4]. Zero becomes 1.0, and positive values become zero.
1570 */
1571 emit_dp(ir, result_dst, temp, temp, vector_elements);
1572
1573 /* Negating the result of the dot-product gives values on the range
1574 * [-4, 0]. Zero becomes 1.0, and negative values become zero.
1575 * This is achieved using SGE.
1576 */
1577 st_src_reg sge_src = result_src;
1578 sge_src.negate = ~sge_src.negate;
1579 emit_asm(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0));
1580 }
1581 } else {
1582 emit_asm(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
1583 }
1584 break;
1585 case ir_binop_any_nequal:
1586 /* "!=" operator producing a scalar boolean. */
1587 if (ir->operands[0]->type->is_vector() ||
1588 ir->operands[1]->type->is_vector()) {
1589 st_src_reg temp = get_temp(native_integers ?
1590 glsl_type::uvec4_type :
1591 glsl_type::vec4_type);
1592 if (ir->operands[0]->type->is_boolean() &&
1593 ir->operands[1]->as_constant() &&
1594 ir->operands[1]->as_constant()->is_zero()) {
1595 emit_asm(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), op[0]);
1596 } else {
1597 emit_asm(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
1598 }
1599
1600 if (native_integers) {
1601 st_dst_reg temp_dst = st_dst_reg(temp);
1602 st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp);
1603
1604 /* Emit 1-3 OR operations to combine the SNE results. */
1605 switch (ir->operands[0]->type->vector_elements) {
1606 case 2:
1607 break;
1608 case 3:
1609 temp_dst.writemask = WRITEMASK_Y;
1610 temp1.swizzle = SWIZZLE_YYYY;
1611 temp2.swizzle = SWIZZLE_ZZZZ;
1612 emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
1613 break;
1614 case 4:
1615 temp_dst.writemask = WRITEMASK_X;
1616 temp1.swizzle = SWIZZLE_XXXX;
1617 temp2.swizzle = SWIZZLE_YYYY;
1618 emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
1619 temp_dst.writemask = WRITEMASK_Y;
1620 temp1.swizzle = SWIZZLE_ZZZZ;
1621 temp2.swizzle = SWIZZLE_WWWW;
1622 emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
1623 }
1624
1625 temp1.swizzle = SWIZZLE_XXXX;
1626 temp2.swizzle = SWIZZLE_YYYY;
1627 emit_asm(ir, TGSI_OPCODE_OR, result_dst, temp1, temp2);
1628 } else {
1629 /* After the dot-product, the value will be an integer on the
1630 * range [0,4]. Zero stays zero, and positive values become 1.0.
1631 */
1632 glsl_to_tgsi_instruction *const dp =
1633 emit_dp(ir, result_dst, temp, temp, vector_elements);
1634 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1635 /* The clamping to [0,1] can be done for free in the fragment
1636 * shader with a saturate.
1637 */
1638 dp->saturate = true;
1639 } else {
1640 /* Negating the result of the dot-product gives values on the range
1641 * [-4, 0]. Zero stays zero, and negative values become 1.0. This
1642 * achieved using SLT.
1643 */
1644 st_src_reg slt_src = result_src;
1645 slt_src.negate = ~slt_src.negate;
1646 emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
1647 }
1648 }
1649 } else {
1650 emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1651 }
1652 break;
1653
1654 case ir_binop_logic_xor:
1655 if (native_integers)
1656 emit_asm(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
1657 else
1658 emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
1659 break;
1660
1661 case ir_binop_logic_or: {
1662 if (native_integers) {
1663 /* If integers are used as booleans, we can use an actual "or"
1664 * instruction.
1665 */
1666 assert(native_integers);
1667 emit_asm(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]);
1668 } else {
1669 /* After the addition, the value will be an integer on the
1670 * range [0,2]. Zero stays zero, and positive values become 1.0.
1671 */
1672 glsl_to_tgsi_instruction *add =
1673 emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
1674 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
1675 /* The clamping to [0,1] can be done for free in the fragment
1676 * shader with a saturate if floats are being used as boolean values.
1677 */
1678 add->saturate = true;
1679 } else {
1680 /* Negating the result of the addition gives values on the range
1681 * [-2, 0]. Zero stays zero, and negative values become 1.0. This
1682 * is achieved using SLT.
1683 */
1684 st_src_reg slt_src = result_src;
1685 slt_src.negate = ~slt_src.negate;
1686 emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
1687 }
1688 }
1689 break;
1690 }
1691
1692 case ir_binop_logic_and:
1693 /* If native integers are disabled, the bool args are stored as float 0.0
1694 * or 1.0, so "mul" gives us "and". If they're enabled, just use the
1695 * actual AND opcode.
1696 */
1697 if (native_integers)
1698 emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]);
1699 else
1700 emit_asm(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
1701 break;
1702
1703 case ir_binop_dot:
1704 assert(ir->operands[0]->type->is_vector());
1705 assert(ir->operands[0]->type == ir->operands[1]->type);
1706 emit_dp(ir, result_dst, op[0], op[1],
1707 ir->operands[0]->type->vector_elements);
1708 break;
1709
1710 case ir_unop_sqrt:
1711 if (have_sqrt) {
1712 emit_scalar(ir, TGSI_OPCODE_SQRT, result_dst, op[0]);
1713 } else {
1714 /* This is the only instruction sequence that makes the game "Risen"
1715 * render correctly. ABS is not required for the game, but since GLSL
1716 * declares negative values as "undefined", allowing us to do whatever
1717 * we want, I choose to use ABS to match DX9 and pre-GLSL RSQ
1718 * behavior.
1719 */
1720 emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0].get_abs());
1721 emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, result_src);
1722 }
1723 break;
1724 case ir_unop_rsq:
1725 emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
1726 break;
1727 case ir_unop_i2f:
1728 if (native_integers) {
1729 emit_asm(ir, TGSI_OPCODE_I2F, result_dst, op[0]);
1730 break;
1731 }
1732 /* fallthrough to next case otherwise */
1733 case ir_unop_b2f:
1734 if (native_integers) {
1735 emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0));
1736 break;
1737 }
1738 /* fallthrough to next case otherwise */
1739 case ir_unop_i2u:
1740 case ir_unop_u2i:
1741 case ir_unop_i642u64:
1742 case ir_unop_u642i64:
1743 /* Converting between signed and unsigned integers is a no-op. */
1744 result_src = op[0];
1745 result_src.type = result_dst.type;
1746 break;
1747 case ir_unop_b2i:
1748 if (native_integers) {
1749 /* Booleans are stored as integers using ~0 for true and 0 for false.
1750 * GLSL requires that int(bool) return 1 for true and 0 for false.
1751 * This conversion is done with AND, but it could be done with NEG.
1752 */
1753 emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1));
1754 } else {
1755 /* Booleans and integers are both stored as floats when native
1756 * integers are disabled.
1757 */
1758 result_src = op[0];
1759 }
1760 break;
1761 case ir_unop_f2i:
1762 if (native_integers)
1763 emit_asm(ir, TGSI_OPCODE_F2I, result_dst, op[0]);
1764 else
1765 emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
1766 break;
1767 case ir_unop_f2u:
1768 if (native_integers)
1769 emit_asm(ir, TGSI_OPCODE_F2U, result_dst, op[0]);
1770 else
1771 emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
1772 break;
1773 case ir_unop_bitcast_f2i:
1774 case ir_unop_bitcast_f2u:
1775 /* Make sure we don't propagate the negate modifier to integer opcodes. */
1776 if (op[0].negate || op[0].abs)
1777 emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
1778 else
1779 result_src = op[0];
1780 result_src.type = ir->operation == ir_unop_bitcast_f2i ? GLSL_TYPE_INT :
1781 GLSL_TYPE_UINT;
1782 break;
1783 case ir_unop_bitcast_i2f:
1784 case ir_unop_bitcast_u2f:
1785 result_src = op[0];
1786 result_src.type = GLSL_TYPE_FLOAT;
1787 break;
1788 case ir_unop_f2b:
1789 emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
1790 break;
1791 case ir_unop_d2b:
1792 emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_double(0.0));
1793 break;
1794 case ir_unop_i2b:
1795 if (native_integers)
1796 emit_asm(ir, TGSI_OPCODE_USNE, result_dst, op[0], st_src_reg_for_int(0));
1797 else
1798 emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
1799 break;
1800 case ir_unop_bitcast_u642d:
1801 case ir_unop_bitcast_i642d:
1802 result_src = op[0];
1803 result_src.type = GLSL_TYPE_DOUBLE;
1804 break;
1805 case ir_unop_bitcast_d2i64:
1806 result_src = op[0];
1807 result_src.type = GLSL_TYPE_INT64;
1808 break;
1809 case ir_unop_bitcast_d2u64:
1810 result_src = op[0];
1811 result_src.type = GLSL_TYPE_UINT64;
1812 break;
1813 case ir_unop_trunc:
1814 emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
1815 break;
1816 case ir_unop_ceil:
1817 emit_asm(ir, TGSI_OPCODE_CEIL, result_dst, op[0]);
1818 break;
1819 case ir_unop_floor:
1820 emit_asm(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
1821 break;
1822 case ir_unop_round_even:
1823 emit_asm(ir, TGSI_OPCODE_ROUND, result_dst, op[0]);
1824 break;
1825 case ir_unop_fract:
1826 emit_asm(ir, TGSI_OPCODE_FRC, result_dst, op[0]);
1827 break;
1828
1829 case ir_binop_min:
1830 emit_asm(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]);
1831 break;
1832 case ir_binop_max:
1833 emit_asm(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]);
1834 break;
1835 case ir_binop_pow:
1836 emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]);
1837 break;
1838
1839 case ir_unop_bit_not:
1840 if (native_integers) {
1841 emit_asm(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
1842 break;
1843 }
1844 case ir_unop_u2f:
1845 if (native_integers) {
1846 emit_asm(ir, TGSI_OPCODE_U2F, result_dst, op[0]);
1847 break;
1848 }
1849 case ir_binop_lshift:
1850 case ir_binop_rshift:
1851 if (native_integers) {
1852 unsigned opcode = ir->operation == ir_binop_lshift ? TGSI_OPCODE_SHL
1853 : TGSI_OPCODE_ISHR;
1854 st_src_reg count;
1855
1856 if (glsl_base_type_is_64bit(op[0].type)) {
1857 /* GLSL shift operations have 32-bit shift counts, but TGSI uses
1858 * 64 bits.
1859 */
1860 count = get_temp(glsl_type::u64vec(ir->operands[1]->type->components()));
1861 emit_asm(ir, TGSI_OPCODE_U2I64, st_dst_reg(count), op[1]);
1862 } else {
1863 count = op[1];
1864 }
1865
1866 emit_asm(ir, opcode, result_dst, op[0], count);
1867 break;
1868 }
1869 case ir_binop_bit_and:
1870 if (native_integers) {
1871 emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]);
1872 break;
1873 }
1874 case ir_binop_bit_xor:
1875 if (native_integers) {
1876 emit_asm(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
1877 break;
1878 }
1879 case ir_binop_bit_or:
1880 if (native_integers) {
1881 emit_asm(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]);
1882 break;
1883 }
1884
1885 assert(!"GLSL 1.30 features unsupported");
1886 break;
1887
1888 case ir_binop_ubo_load: {
1889 if (ctx->Const.UseSTD430AsDefaultPacking) {
1890 ir_rvalue *block = ir->operands[0];
1891 ir_rvalue *offset = ir->operands[1];
1892 ir_constant *const_block = block->as_constant();
1893
1894 st_src_reg cbuf(PROGRAM_CONSTANT,
1895 (const_block ? const_block->value.u[0] + 1 : 1),
1896 ir->type->base_type);
1897
1898 cbuf.has_index2 = true;
1899
1900 if (!const_block) {
1901 block->accept(this);
1902 cbuf.reladdr = ralloc(mem_ctx, st_src_reg);
1903 *cbuf.reladdr = this->result;
1904 emit_arl(ir, sampler_reladdr, this->result);
1905 }
1906
1907 /* Calculate the surface offset */
1908 offset->accept(this);
1909 st_src_reg off = this->result;
1910
1911 glsl_to_tgsi_instruction *inst =
1912 emit_asm(ir, TGSI_OPCODE_LOAD, result_dst, off);
1913
1914 if (result_dst.type == GLSL_TYPE_BOOL)
1915 emit_asm(ir, TGSI_OPCODE_USNE, result_dst, st_src_reg(result_dst),
1916 st_src_reg_for_int(0));
1917
1918 add_buffer_to_load_and_stores(inst, &cbuf, &this->instructions,
1919 NULL);
1920 } else {
1921 ir_constant *const_uniform_block = ir->operands[0]->as_constant();
1922 ir_constant *const_offset_ir = ir->operands[1]->as_constant();
1923 unsigned const_offset = const_offset_ir ?
1924 const_offset_ir->value.u[0] : 0;
1925 unsigned const_block = const_uniform_block ?
1926 const_uniform_block->value.u[0] + 1 : 1;
1927 st_src_reg index_reg = get_temp(glsl_type::uint_type);
1928 st_src_reg cbuf;
1929
1930 cbuf.type = ir->type->base_type;
1931 cbuf.file = PROGRAM_CONSTANT;
1932 cbuf.index = 0;
1933 cbuf.reladdr = NULL;
1934 cbuf.negate = 0;
1935 cbuf.abs = 0;
1936 cbuf.index2D = const_block;
1937
1938 assert(ir->type->is_vector() || ir->type->is_scalar());
1939
1940 if (const_offset_ir) {
1941 /* Constant index into constant buffer */
1942 cbuf.reladdr = NULL;
1943 cbuf.index = const_offset / 16;
1944 } else {
1945 ir_expression *offset_expr = ir->operands[1]->as_expression();
1946 st_src_reg offset = op[1];
1947
1948 /* The OpenGL spec is written in such a way that accesses with
1949 * non-constant offset are almost always vec4-aligned. The only
1950 * exception to this are members of structs in arrays of structs:
1951 * each struct in an array of structs is at least vec4-aligned,
1952 * but single-element and [ui]vec2 members of the struct may be at
1953 * an offset that is not a multiple of 16 bytes.
1954 *
1955 * Here, we extract that offset, relying on previous passes to
1956 * always generate offset expressions of the form
1957 * (+ expr constant_offset).
1958 *
1959 * Note that the std430 layout, which allows more cases of
1960 * alignment less than vec4 in arrays, is not supported for
1961 * uniform blocks, so we do not have to deal with it here.
1962 */
1963 if (offset_expr && offset_expr->operation == ir_binop_add) {
1964 const_offset_ir = offset_expr->operands[1]->as_constant();
1965 if (const_offset_ir) {
1966 const_offset = const_offset_ir->value.u[0];
1967 cbuf.index = const_offset / 16;
1968 offset_expr->operands[0]->accept(this);
1969 offset = this->result;
1970 }
1971 }
1972
1973 /* Relative/variable index into constant buffer */
1974 emit_asm(ir, TGSI_OPCODE_USHR, st_dst_reg(index_reg), offset,
1975 st_src_reg_for_int(4));
1976 cbuf.reladdr = ralloc(mem_ctx, st_src_reg);
1977 memcpy(cbuf.reladdr, &index_reg, sizeof(index_reg));
1978 }
1979
1980 if (const_uniform_block) {
1981 /* Constant constant buffer */
1982 cbuf.reladdr2 = NULL;
1983 } else {
1984 /* Relative/variable constant buffer */
1985 cbuf.reladdr2 = ralloc(mem_ctx, st_src_reg);
1986 memcpy(cbuf.reladdr2, &op[0], sizeof(st_src_reg));
1987 }
1988 cbuf.has_index2 = true;
1989
1990 cbuf.swizzle = swizzle_for_size(ir->type->vector_elements);
1991 if (glsl_base_type_is_64bit(cbuf.type))
1992 cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 8,
1993 const_offset % 16 / 8,
1994 const_offset % 16 / 8,
1995 const_offset % 16 / 8);
1996 else
1997 cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 4,
1998 const_offset % 16 / 4,
1999 const_offset % 16 / 4,
2000 const_offset % 16 / 4);
2001
2002 if (ir->type->is_boolean()) {
2003 emit_asm(ir, TGSI_OPCODE_USNE, result_dst, cbuf,
2004 st_src_reg_for_int(0));
2005 } else {
2006 emit_asm(ir, TGSI_OPCODE_MOV, result_dst, cbuf);
2007 }
2008 }
2009 break;
2010 }
2011 case ir_triop_lrp:
2012 /* note: we have to reorder the three args here */
2013 emit_asm(ir, TGSI_OPCODE_LRP, result_dst, op[2], op[1], op[0]);
2014 break;
2015 case ir_triop_csel:
2016 if (this->ctx->Const.NativeIntegers)
2017 emit_asm(ir, TGSI_OPCODE_UCMP, result_dst, op[0], op[1], op[2]);
2018 else {
2019 op[0].negate = ~op[0].negate;
2020 emit_asm(ir, TGSI_OPCODE_CMP, result_dst, op[0], op[1], op[2]);
2021 }
2022 break;
2023 case ir_triop_bitfield_extract:
2024 emit_asm(ir, TGSI_OPCODE_IBFE, result_dst, op[0], op[1], op[2]);
2025 break;
2026 case ir_quadop_bitfield_insert:
2027 emit_asm(ir, TGSI_OPCODE_BFI, result_dst, op[0], op[1], op[2], op[3]);
2028 break;
2029 case ir_unop_bitfield_reverse:
2030 emit_asm(ir, TGSI_OPCODE_BREV, result_dst, op[0]);
2031 break;
2032 case ir_unop_bit_count:
2033 emit_asm(ir, TGSI_OPCODE_POPC, result_dst, op[0]);
2034 break;
2035 case ir_unop_find_msb:
2036 emit_asm(ir, TGSI_OPCODE_IMSB, result_dst, op[0]);
2037 break;
2038 case ir_unop_find_lsb:
2039 emit_asm(ir, TGSI_OPCODE_LSB, result_dst, op[0]);
2040 break;
2041 case ir_binop_imul_high:
2042 emit_asm(ir, TGSI_OPCODE_IMUL_HI, result_dst, op[0], op[1]);
2043 break;
2044 case ir_triop_fma:
2045 /* In theory, MAD is incorrect here. */
2046 if (have_fma)
2047 emit_asm(ir, TGSI_OPCODE_FMA, result_dst, op[0], op[1], op[2]);
2048 else
2049 emit_asm(ir, TGSI_OPCODE_MAD, result_dst, op[0], op[1], op[2]);
2050 break;
2051 case ir_unop_interpolate_at_centroid:
2052 emit_asm(ir, TGSI_OPCODE_INTERP_CENTROID, result_dst, op[0]);
2053 break;
2054 case ir_binop_interpolate_at_offset: {
2055 /* The y coordinate needs to be flipped for the default fb */
2056 static const gl_state_index transform_y_state[STATE_LENGTH]
2057 = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM };
2058
2059 unsigned transform_y_index =
2060 _mesa_add_state_reference(this->prog->Parameters,
2061 transform_y_state);
2062
2063 st_src_reg transform_y = st_src_reg(PROGRAM_STATE_VAR,
2064 transform_y_index,
2065 glsl_type::vec4_type);
2066 transform_y.swizzle = SWIZZLE_XXXX;
2067
2068 st_src_reg temp = get_temp(glsl_type::vec2_type);
2069 st_dst_reg temp_dst = st_dst_reg(temp);
2070
2071 emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[1]);
2072 temp_dst.writemask = WRITEMASK_Y;
2073 emit_asm(ir, TGSI_OPCODE_MUL, temp_dst, transform_y, op[1]);
2074 emit_asm(ir, TGSI_OPCODE_INTERP_OFFSET, result_dst, op[0], temp);
2075 break;
2076 }
2077 case ir_binop_interpolate_at_sample:
2078 emit_asm(ir, TGSI_OPCODE_INTERP_SAMPLE, result_dst, op[0], op[1]);
2079 break;
2080
2081 case ir_unop_d2f:
2082 emit_asm(ir, TGSI_OPCODE_D2F, result_dst, op[0]);
2083 break;
2084 case ir_unop_f2d:
2085 emit_asm(ir, TGSI_OPCODE_F2D, result_dst, op[0]);
2086 break;
2087 case ir_unop_d2i:
2088 emit_asm(ir, TGSI_OPCODE_D2I, result_dst, op[0]);
2089 break;
2090 case ir_unop_i2d:
2091 emit_asm(ir, TGSI_OPCODE_I2D, result_dst, op[0]);
2092 break;
2093 case ir_unop_d2u:
2094 emit_asm(ir, TGSI_OPCODE_D2U, result_dst, op[0]);
2095 break;
2096 case ir_unop_u2d:
2097 emit_asm(ir, TGSI_OPCODE_U2D, result_dst, op[0]);
2098 break;
2099 case ir_unop_unpack_double_2x32:
2100 case ir_unop_pack_double_2x32:
2101 case ir_unop_unpack_int_2x32:
2102 case ir_unop_pack_int_2x32:
2103 case ir_unop_unpack_uint_2x32:
2104 case ir_unop_pack_uint_2x32:
2105 case ir_unop_unpack_sampler_2x32:
2106 case ir_unop_pack_sampler_2x32:
2107 case ir_unop_unpack_image_2x32:
2108 case ir_unop_pack_image_2x32:
2109 emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
2110 break;
2111
2112 case ir_binop_ldexp:
2113 if (ir->operands[0]->type->is_double()) {
2114 emit_asm(ir, TGSI_OPCODE_DLDEXP, result_dst, op[0], op[1]);
2115 } else if (ir->operands[0]->type->is_float()) {
2116 emit_asm(ir, TGSI_OPCODE_LDEXP, result_dst, op[0], op[1]);
2117 } else {
2118 assert(!"Invalid ldexp for non-double opcode in glsl_to_tgsi_visitor::visit()");
2119 }
2120 break;
2121
2122 case ir_unop_pack_half_2x16:
2123 emit_asm(ir, TGSI_OPCODE_PK2H, result_dst, op[0]);
2124 break;
2125 case ir_unop_unpack_half_2x16:
2126 emit_asm(ir, TGSI_OPCODE_UP2H, result_dst, op[0]);
2127 break;
2128
2129 case ir_unop_get_buffer_size: {
2130 ir_constant *const_offset = ir->operands[0]->as_constant();
2131 int buf_base = ctx->st->has_hw_atomics ? 0 : ctx->Const.Program[shader->Stage].MaxAtomicBuffers;
2132 st_src_reg buffer(
2133 PROGRAM_BUFFER,
2134 buf_base + (const_offset ? const_offset->value.u[0] : 0),
2135 GLSL_TYPE_UINT);
2136 if (!const_offset) {
2137 buffer.reladdr = ralloc(mem_ctx, st_src_reg);
2138 *buffer.reladdr = op[0];
2139 emit_arl(ir, sampler_reladdr, op[0]);
2140 }
2141 emit_asm(ir, TGSI_OPCODE_RESQ, result_dst)->resource = buffer;
2142 break;
2143 }
2144
2145 case ir_unop_u2i64:
2146 case ir_unop_u2u64:
2147 case ir_unop_b2i64: {
2148 st_src_reg temp = get_temp(glsl_type::uvec4_type);
2149 st_dst_reg temp_dst = st_dst_reg(temp);
2150 unsigned orig_swz = op[0].swizzle;
2151 /*
2152 * To convert unsigned to 64-bit:
2153 * zero Y channel, copy X channel.
2154 */
2155 temp_dst.writemask = WRITEMASK_Y;
2156 if (vector_elements > 1)
2157 temp_dst.writemask |= WRITEMASK_W;
2158 emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, st_src_reg_for_int(0));
2159 temp_dst.writemask = WRITEMASK_X;
2160 if (vector_elements > 1)
2161 temp_dst.writemask |= WRITEMASK_Z;
2162 op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(orig_swz, 0), GET_SWZ(orig_swz, 0),
2163 GET_SWZ(orig_swz, 1), GET_SWZ(orig_swz, 1));
2164 if (ir->operation == ir_unop_u2i64 || ir->operation == ir_unop_u2u64)
2165 emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[0]);
2166 else
2167 emit_asm(ir, TGSI_OPCODE_AND, temp_dst, op[0], st_src_reg_for_int(1));
2168 result_src = temp;
2169 result_src.type = GLSL_TYPE_UINT64;
2170 if (vector_elements > 2) {
2171 /* Subtle: We rely on the fact that get_temp here returns the next
2172 * TGSI temporary register directly after the temp register used for
2173 * the first two components, so that the result gets picked up
2174 * automatically.
2175 */
2176 st_src_reg temp = get_temp(glsl_type::uvec4_type);
2177 st_dst_reg temp_dst = st_dst_reg(temp);
2178 temp_dst.writemask = WRITEMASK_Y;
2179 if (vector_elements > 3)
2180 temp_dst.writemask |= WRITEMASK_W;
2181 emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, st_src_reg_for_int(0));
2182
2183 temp_dst.writemask = WRITEMASK_X;
2184 if (vector_elements > 3)
2185 temp_dst.writemask |= WRITEMASK_Z;
2186 op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(orig_swz, 2), GET_SWZ(orig_swz, 2),
2187 GET_SWZ(orig_swz, 3), GET_SWZ(orig_swz, 3));
2188 if (ir->operation == ir_unop_u2i64 || ir->operation == ir_unop_u2u64)
2189 emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[0]);
2190 else
2191 emit_asm(ir, TGSI_OPCODE_AND, temp_dst, op[0], st_src_reg_for_int(1));
2192 }
2193 break;
2194 }
2195 case ir_unop_i642i:
2196 case ir_unop_u642i:
2197 case ir_unop_u642u:
2198 case ir_unop_i642u: {
2199 st_src_reg temp = get_temp(glsl_type::uvec4_type);
2200 st_dst_reg temp_dst = st_dst_reg(temp);
2201 unsigned orig_swz = op[0].swizzle;
2202 unsigned orig_idx = op[0].index;
2203 int el;
2204 temp_dst.writemask = WRITEMASK_X;
2205
2206 for (el = 0; el < vector_elements; el++) {
2207 unsigned swz = GET_SWZ(orig_swz, el);
2208 if (swz & 1)
2209 op[0].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z);
2210 else
2211 op[0].swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X);
2212 if (swz > 2)
2213 op[0].index = orig_idx + 1;
2214 op[0].type = GLSL_TYPE_UINT;
2215 temp_dst.writemask = WRITEMASK_X << el;
2216 emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[0]);
2217 }
2218 result_src = temp;
2219 if (ir->operation == ir_unop_u642u || ir->operation == ir_unop_i642u)
2220 result_src.type = GLSL_TYPE_UINT;
2221 else
2222 result_src.type = GLSL_TYPE_INT;
2223 break;
2224 }
2225 case ir_unop_i642b:
2226 emit_asm(ir, TGSI_OPCODE_U64SNE, result_dst, op[0], st_src_reg_for_int64(0));
2227 break;
2228 case ir_unop_i642f:
2229 emit_asm(ir, TGSI_OPCODE_I642F, result_dst, op[0]);
2230 break;
2231 case ir_unop_u642f:
2232 emit_asm(ir, TGSI_OPCODE_U642F, result_dst, op[0]);
2233 break;
2234 case ir_unop_i642d:
2235 emit_asm(ir, TGSI_OPCODE_I642D, result_dst, op[0]);
2236 break;
2237 case ir_unop_u642d:
2238 emit_asm(ir, TGSI_OPCODE_U642D, result_dst, op[0]);
2239 break;
2240 case ir_unop_i2i64:
2241 emit_asm(ir, TGSI_OPCODE_I2I64, result_dst, op[0]);
2242 break;
2243 case ir_unop_f2i64:
2244 emit_asm(ir, TGSI_OPCODE_F2I64, result_dst, op[0]);
2245 break;
2246 case ir_unop_d2i64:
2247 emit_asm(ir, TGSI_OPCODE_D2I64, result_dst, op[0]);
2248 break;
2249 case ir_unop_i2u64:
2250 emit_asm(ir, TGSI_OPCODE_I2I64, result_dst, op[0]);
2251 break;
2252 case ir_unop_f2u64:
2253 emit_asm(ir, TGSI_OPCODE_F2U64, result_dst, op[0]);
2254 break;
2255 case ir_unop_d2u64:
2256 emit_asm(ir, TGSI_OPCODE_D2U64, result_dst, op[0]);
2257 break;
2258 /* these might be needed */
2259 case ir_unop_pack_snorm_2x16:
2260 case ir_unop_pack_unorm_2x16:
2261 case ir_unop_pack_snorm_4x8:
2262 case ir_unop_pack_unorm_4x8:
2263
2264 case ir_unop_unpack_snorm_2x16:
2265 case ir_unop_unpack_unorm_2x16:
2266 case ir_unop_unpack_snorm_4x8:
2267 case ir_unop_unpack_unorm_4x8:
2268
2269 case ir_quadop_vector:
2270 case ir_binop_vector_extract:
2271 case ir_triop_vector_insert:
2272 case ir_binop_carry:
2273 case ir_binop_borrow:
2274 case ir_unop_ssbo_unsized_array_length:
2275 /* This operation is not supported, or should have already been handled.
2276 */
2277 assert(!"Invalid ir opcode in glsl_to_tgsi_visitor::visit()");
2278 break;
2279 }
2280
2281 this->result = result_src;
2282 }
2283
2284
2285 void
2286 glsl_to_tgsi_visitor::visit(ir_swizzle *ir)
2287 {
2288 st_src_reg src;
2289 int i;
2290 int swizzle[4];
2291
2292 /* Note that this is only swizzles in expressions, not those on the left
2293 * hand side of an assignment, which do write masking. See ir_assignment
2294 * for that.
2295 */
2296
2297 ir->val->accept(this);
2298 src = this->result;
2299 assert(src.file != PROGRAM_UNDEFINED);
2300 assert(ir->type->vector_elements > 0);
2301
2302 for (i = 0; i < 4; i++) {
2303 if (i < ir->type->vector_elements) {
2304 switch (i) {
2305 case 0:
2306 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x);
2307 break;
2308 case 1:
2309 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y);
2310 break;
2311 case 2:
2312 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z);
2313 break;
2314 case 3:
2315 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w);
2316 break;
2317 }
2318 } else {
2319 /* If the type is smaller than a vec4, replicate the last
2320 * channel out.
2321 */
2322 swizzle[i] = swizzle[ir->type->vector_elements - 1];
2323 }
2324 }
2325
2326 src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
2327
2328 this->result = src;
2329 }
2330
2331 /* Test if the variable is an array. Note that geometry and
2332 * tessellation shader inputs are outputs are always arrays (except
2333 * for patch inputs), so only the array element type is considered.
2334 */
2335 static bool
2336 is_inout_array(unsigned stage, ir_variable *var, bool *remove_array)
2337 {
2338 const glsl_type *type = var->type;
2339
2340 *remove_array = false;
2341
2342 if ((stage == MESA_SHADER_VERTEX && var->data.mode == ir_var_shader_in) ||
2343 (stage == MESA_SHADER_FRAGMENT && var->data.mode == ir_var_shader_out))
2344 return false;
2345
2346 if (((stage == MESA_SHADER_GEOMETRY && var->data.mode == ir_var_shader_in) ||
2347 (stage == MESA_SHADER_TESS_EVAL && var->data.mode == ir_var_shader_in) ||
2348 stage == MESA_SHADER_TESS_CTRL) &&
2349 !var->data.patch) {
2350 if (!var->type->is_array())
2351 return false; /* a system value probably */
2352
2353 type = var->type->fields.array;
2354 *remove_array = true;
2355 }
2356
2357 return type->is_array() || type->is_matrix();
2358 }
2359
2360 static unsigned
2361 st_translate_interp_loc(ir_variable *var)
2362 {
2363 if (var->data.centroid)
2364 return TGSI_INTERPOLATE_LOC_CENTROID;
2365 else if (var->data.sample)
2366 return TGSI_INTERPOLATE_LOC_SAMPLE;
2367 else
2368 return TGSI_INTERPOLATE_LOC_CENTER;
2369 }
2370
2371 void
2372 glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
2373 {
2374 variable_storage *entry = find_variable_storage(ir->var);
2375 ir_variable *var = ir->var;
2376 bool remove_array;
2377
2378 if (!entry) {
2379 switch (var->data.mode) {
2380 case ir_var_uniform:
2381 entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
2382 var->data.param_index);
2383 _mesa_hash_table_insert(this->variables, var, entry);
2384 break;
2385 case ir_var_shader_in: {
2386 /* The linker assigns locations for varyings and attributes,
2387 * including deprecated builtins (like gl_Color), user-assign
2388 * generic attributes (glBindVertexLocation), and
2389 * user-defined varyings.
2390 */
2391 assert(var->data.location != -1);
2392
2393 const glsl_type *type_without_array = var->type->without_array();
2394 struct inout_decl *decl = &inputs[num_inputs];
2395 unsigned component = var->data.location_frac;
2396 unsigned num_components;
2397 num_inputs++;
2398
2399 if (type_without_array->is_64bit())
2400 component = component / 2;
2401 if (type_without_array->vector_elements)
2402 num_components = type_without_array->vector_elements;
2403 else
2404 num_components = 4;
2405
2406 decl->mesa_index = var->data.location;
2407 decl->interp = (glsl_interp_mode) var->data.interpolation;
2408 decl->interp_loc = st_translate_interp_loc(var);
2409 decl->base_type = type_without_array->base_type;
2410 decl->usage_mask = u_bit_consecutive(component, num_components);
2411
2412 if (is_inout_array(shader->Stage, var, &remove_array)) {
2413 decl->array_id = num_input_arrays + 1;
2414 num_input_arrays++;
2415 } else {
2416 decl->array_id = 0;
2417 }
2418
2419 if (remove_array)
2420 decl->size = type_size(var->type->fields.array);
2421 else
2422 decl->size = type_size(var->type);
2423
2424 entry = new(mem_ctx) variable_storage(var,
2425 PROGRAM_INPUT,
2426 decl->mesa_index,
2427 decl->array_id);
2428 entry->component = component;
2429
2430 _mesa_hash_table_insert(this->variables, var, entry);
2431
2432 break;
2433 }
2434 case ir_var_shader_out: {
2435 assert(var->data.location != -1);
2436
2437 const glsl_type *type_without_array = var->type->without_array();
2438 struct inout_decl *decl = &outputs[num_outputs];
2439 unsigned component = var->data.location_frac;
2440 unsigned num_components;
2441 num_outputs++;
2442
2443 if (type_without_array->is_64bit())
2444 component = component / 2;
2445 if (type_without_array->vector_elements)
2446 num_components = type_without_array->vector_elements;
2447 else
2448 num_components = 4;
2449
2450 decl->mesa_index = var->data.location + FRAG_RESULT_MAX * var->data.index;
2451 decl->base_type = type_without_array->base_type;
2452 decl->usage_mask = u_bit_consecutive(component, num_components);
2453 if (var->data.stream & (1u << 31)) {
2454 decl->gs_out_streams = var->data.stream & ~(1u << 31);
2455 } else {
2456 assert(var->data.stream < 4);
2457 decl->gs_out_streams = 0;
2458 for (unsigned i = 0; i < num_components; ++i)
2459 decl->gs_out_streams |= var->data.stream << (2 * (component + i));
2460 }
2461
2462 if (is_inout_array(shader->Stage, var, &remove_array)) {
2463 decl->array_id = num_output_arrays + 1;
2464 num_output_arrays++;
2465 } else {
2466 decl->array_id = 0;
2467 }
2468
2469 if (remove_array)
2470 decl->size = type_size(var->type->fields.array);
2471 else
2472 decl->size = type_size(var->type);
2473
2474 if (var->data.fb_fetch_output) {
2475 st_dst_reg dst = st_dst_reg(get_temp(var->type));
2476 st_src_reg src = st_src_reg(PROGRAM_OUTPUT, decl->mesa_index,
2477 var->type, component, decl->array_id);
2478 emit_asm(NULL, TGSI_OPCODE_FBFETCH, dst, src);
2479 entry = new(mem_ctx) variable_storage(var, dst.file, dst.index,
2480 dst.array_id);
2481 } else {
2482 entry = new(mem_ctx) variable_storage(var,
2483 PROGRAM_OUTPUT,
2484 decl->mesa_index,
2485 decl->array_id);
2486 }
2487 entry->component = component;
2488
2489 _mesa_hash_table_insert(this->variables, var, entry);
2490
2491 break;
2492 }
2493 case ir_var_system_value:
2494 entry = new(mem_ctx) variable_storage(var,
2495 PROGRAM_SYSTEM_VALUE,
2496 var->data.location);
2497 break;
2498 case ir_var_auto:
2499 case ir_var_temporary:
2500 st_src_reg src = get_temp(var->type);
2501
2502 entry = new(mem_ctx) variable_storage(var, src.file, src.index,
2503 src.array_id);
2504 _mesa_hash_table_insert(this->variables, var, entry);
2505
2506 break;
2507 }
2508
2509 if (!entry) {
2510 printf("Failed to make storage for %s\n", var->name);
2511 exit(1);
2512 }
2513 }
2514
2515 this->result = st_src_reg(entry->file, entry->index, var->type,
2516 entry->component, entry->array_id);
2517 if (this->shader->Stage == MESA_SHADER_VERTEX &&
2518 var->data.mode == ir_var_shader_in &&
2519 var->type->without_array()->is_double())
2520 this->result.is_double_vertex_input = true;
2521 if (!native_integers)
2522 this->result.type = GLSL_TYPE_FLOAT;
2523 }
2524
2525 static void
2526 shrink_array_declarations(struct inout_decl *decls, unsigned count,
2527 GLbitfield64* usage_mask,
2528 GLbitfield64 double_usage_mask,
2529 GLbitfield* patch_usage_mask)
2530 {
2531 unsigned i;
2532 int j;
2533
2534 /* Fix array declarations by removing unused array elements at both ends
2535 * of the arrays. For example, mat4[3] where only mat[1] is used.
2536 */
2537 for (i = 0; i < count; i++) {
2538 struct inout_decl *decl = &decls[i];
2539 if (!decl->array_id)
2540 continue;
2541
2542 /* Shrink the beginning. */
2543 for (j = 0; j < (int)decl->size; j++) {
2544 if (decl->mesa_index >= VARYING_SLOT_PATCH0) {
2545 if (*patch_usage_mask &
2546 BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j))
2547 break;
2548 }
2549 else {
2550 if (*usage_mask & BITFIELD64_BIT(decl->mesa_index+j))
2551 break;
2552 if (double_usage_mask & BITFIELD64_BIT(decl->mesa_index+j-1))
2553 break;
2554 }
2555
2556 decl->mesa_index++;
2557 decl->size--;
2558 j--;
2559 }
2560
2561 /* Shrink the end. */
2562 for (j = decl->size-1; j >= 0; j--) {
2563 if (decl->mesa_index >= VARYING_SLOT_PATCH0) {
2564 if (*patch_usage_mask &
2565 BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j))
2566 break;
2567 }
2568 else {
2569 if (*usage_mask & BITFIELD64_BIT(decl->mesa_index+j))
2570 break;
2571 if (double_usage_mask & BITFIELD64_BIT(decl->mesa_index+j-1))
2572 break;
2573 }
2574
2575 decl->size--;
2576 }
2577
2578 /* When not all entries of an array are accessed, we mark them as used
2579 * here anyway, to ensure that the input/output mapping logic doesn't get
2580 * confused.
2581 *
2582 * TODO This happens when an array isn't used via indirect access, which
2583 * some game ports do (at least eON-based). There is an optimization
2584 * opportunity here by replacing the array declaration with non-array
2585 * declarations of those slots that are actually used.
2586 */
2587 for (j = 1; j < (int)decl->size; ++j) {
2588 if (decl->mesa_index >= VARYING_SLOT_PATCH0)
2589 *patch_usage_mask |= BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j);
2590 else
2591 *usage_mask |= BITFIELD64_BIT(decl->mesa_index + j);
2592 }
2593 }
2594 }
2595
2596 void
2597 glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
2598 {
2599 ir_constant *index;
2600 st_src_reg src;
2601 bool is_2D = false;
2602 ir_variable *var = ir->variable_referenced();
2603
2604 /* We only need the logic provided by st_glsl_storage_type_size()
2605 * for arrays of structs. Indirect sampler and image indexing is handled
2606 * elsewhere.
2607 */
2608 int element_size = ir->type->without_array()->is_record() ?
2609 st_glsl_storage_type_size(ir->type, var->data.bindless) :
2610 type_size(ir->type);
2611
2612 index = ir->array_index->constant_expression_value(ralloc_parent(ir));
2613
2614 ir->array->accept(this);
2615 src = this->result;
2616
2617 if (!src.has_index2) {
2618 switch (this->prog->Target) {
2619 case GL_TESS_CONTROL_PROGRAM_NV:
2620 is_2D = (src.file == PROGRAM_INPUT || src.file == PROGRAM_OUTPUT) &&
2621 !ir->variable_referenced()->data.patch;
2622 break;
2623 case GL_TESS_EVALUATION_PROGRAM_NV:
2624 is_2D = src.file == PROGRAM_INPUT &&
2625 !ir->variable_referenced()->data.patch;
2626 break;
2627 case GL_GEOMETRY_PROGRAM_NV:
2628 is_2D = src.file == PROGRAM_INPUT;
2629 break;
2630 }
2631 }
2632
2633 if (is_2D)
2634 element_size = 1;
2635
2636 if (index) {
2637
2638 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
2639 src.file == PROGRAM_INPUT)
2640 element_size = attrib_type_size(ir->type, true);
2641 if (is_2D) {
2642 src.index2D = index->value.i[0];
2643 src.has_index2 = true;
2644 } else
2645 src.index += index->value.i[0] * element_size;
2646 } else {
2647 /* Variable index array dereference. It eats the "vec4" of the
2648 * base of the array and an index that offsets the TGSI register
2649 * index.
2650 */
2651 ir->array_index->accept(this);
2652
2653 st_src_reg index_reg;
2654
2655 if (element_size == 1) {
2656 index_reg = this->result;
2657 } else {
2658 index_reg = get_temp(native_integers ?
2659 glsl_type::int_type : glsl_type::float_type);
2660
2661 emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg),
2662 this->result, st_src_reg_for_type(index_reg.type, element_size));
2663 }
2664
2665 /* If there was already a relative address register involved, add the
2666 * new and the old together to get the new offset.
2667 */
2668 if (!is_2D && src.reladdr != NULL) {
2669 st_src_reg accum_reg = get_temp(native_integers ?
2670 glsl_type::int_type : glsl_type::float_type);
2671
2672 emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg),
2673 index_reg, *src.reladdr);
2674
2675 index_reg = accum_reg;
2676 }
2677
2678 if (is_2D) {
2679 src.reladdr2 = ralloc(mem_ctx, st_src_reg);
2680 memcpy(src.reladdr2, &index_reg, sizeof(index_reg));
2681 src.index2D = 0;
2682 src.has_index2 = true;
2683 } else {
2684 src.reladdr = ralloc(mem_ctx, st_src_reg);
2685 memcpy(src.reladdr, &index_reg, sizeof(index_reg));
2686 }
2687 }
2688
2689 /* Change the register type to the element type of the array. */
2690 src.type = ir->type->base_type;
2691
2692 this->result = src;
2693 }
2694
2695 void
2696 glsl_to_tgsi_visitor::visit(ir_dereference_record *ir)
2697 {
2698 unsigned int i;
2699 const glsl_type *struct_type = ir->record->type;
2700 ir_variable *var = ir->record->variable_referenced();
2701 int offset = 0;
2702
2703 ir->record->accept(this);
2704
2705 assert(ir->field_idx >= 0);
2706 assert(var);
2707 for (i = 0; i < struct_type->length; i++) {
2708 if (i == (unsigned) ir->field_idx)
2709 break;
2710 const glsl_type *member_type = struct_type->fields.structure[i].type;
2711 offset += st_glsl_storage_type_size(member_type, var->data.bindless);
2712 }
2713
2714 /* If the type is smaller than a vec4, replicate the last channel out. */
2715 if (ir->type->is_scalar() || ir->type->is_vector())
2716 this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
2717 else
2718 this->result.swizzle = SWIZZLE_NOOP;
2719
2720 this->result.index += offset;
2721 this->result.type = ir->type->base_type;
2722 }
2723
2724 /**
2725 * We want to be careful in assignment setup to hit the actual storage
2726 * instead of potentially using a temporary like we might with the
2727 * ir_dereference handler.
2728 */
2729 static st_dst_reg
2730 get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v, int *component)
2731 {
2732 /* The LHS must be a dereference. If the LHS is a variable indexed array
2733 * access of a vector, it must be separated into a series conditional moves
2734 * before reaching this point (see ir_vec_index_to_cond_assign).
2735 */
2736 assert(ir->as_dereference());
2737 ir_dereference_array *deref_array = ir->as_dereference_array();
2738 if (deref_array) {
2739 assert(!deref_array->array->type->is_vector());
2740 }
2741
2742 /* Use the rvalue deref handler for the most part. We write swizzles using
2743 * the writemask, but we do extract the base component for enhanced layouts
2744 * from the source swizzle.
2745 */
2746 ir->accept(v);
2747 *component = GET_SWZ(v->result.swizzle, 0);
2748 return st_dst_reg(v->result);
2749 }
2750
2751 /**
2752 * Process the condition of a conditional assignment
2753 *
2754 * Examines the condition of a conditional assignment to generate the optimal
2755 * first operand of a \c CMP instruction. If the condition is a relational
2756 * operator with 0 (e.g., \c ir_binop_less), the value being compared will be
2757 * used as the source for the \c CMP instruction. Otherwise the comparison
2758 * is processed to a boolean result, and the boolean result is used as the
2759 * operand to the CMP instruction.
2760 */
2761 bool
2762 glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir)
2763 {
2764 ir_rvalue *src_ir = ir;
2765 bool negate = true;
2766 bool switch_order = false;
2767
2768 ir_expression *const expr = ir->as_expression();
2769
2770 if (native_integers) {
2771 if ((expr != NULL) && (expr->num_operands == 2)) {
2772 enum glsl_base_type type = expr->operands[0]->type->base_type;
2773 if (type == GLSL_TYPE_INT || type == GLSL_TYPE_UINT ||
2774 type == GLSL_TYPE_BOOL) {
2775 if (expr->operation == ir_binop_equal) {
2776 if (expr->operands[0]->is_zero()) {
2777 src_ir = expr->operands[1];
2778 switch_order = true;
2779 }
2780 else if (expr->operands[1]->is_zero()) {
2781 src_ir = expr->operands[0];
2782 switch_order = true;
2783 }
2784 }
2785 else if (expr->operation == ir_binop_nequal) {
2786 if (expr->operands[0]->is_zero()) {
2787 src_ir = expr->operands[1];
2788 }
2789 else if (expr->operands[1]->is_zero()) {
2790 src_ir = expr->operands[0];
2791 }
2792 }
2793 }
2794 }
2795
2796 src_ir->accept(this);
2797 return switch_order;
2798 }
2799
2800 if ((expr != NULL) && (expr->num_operands == 2)) {
2801 bool zero_on_left = false;
2802
2803 if (expr->operands[0]->is_zero()) {
2804 src_ir = expr->operands[1];
2805 zero_on_left = true;
2806 } else if (expr->operands[1]->is_zero()) {
2807 src_ir = expr->operands[0];
2808 zero_on_left = false;
2809 }
2810
2811 /* a is - 0 + - 0 +
2812 * (a < 0) T F F ( a < 0) T F F
2813 * (0 < a) F F T (-a < 0) F F T
2814 * (a >= 0) F T T ( a < 0) T F F (swap order of other operands)
2815 * (0 >= a) T T F (-a < 0) F F T (swap order of other operands)
2816 *
2817 * Note that exchanging the order of 0 and 'a' in the comparison simply
2818 * means that the value of 'a' should be negated.
2819 */
2820 if (src_ir != ir) {
2821 switch (expr->operation) {
2822 case ir_binop_less:
2823 switch_order = false;
2824 negate = zero_on_left;
2825 break;
2826
2827 case ir_binop_gequal:
2828 switch_order = true;
2829 negate = zero_on_left;
2830 break;
2831
2832 default:
2833 /* This isn't the right kind of comparison afterall, so make sure
2834 * the whole condition is visited.
2835 */
2836 src_ir = ir;
2837 break;
2838 }
2839 }
2840 }
2841
2842 src_ir->accept(this);
2843
2844 /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
2845 * condition we produced is 0.0 or 1.0. By flipping the sign, we can
2846 * choose which value TGSI_OPCODE_CMP produces without an extra instruction
2847 * computing the condition.
2848 */
2849 if (negate)
2850 this->result.negate = ~this->result.negate;
2851
2852 return switch_order;
2853 }
2854
2855 void
2856 glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, const struct glsl_type *type,
2857 st_dst_reg *l, st_src_reg *r,
2858 st_src_reg *cond, bool cond_swap)
2859 {
2860 if (type->is_record()) {
2861 for (unsigned int i = 0; i < type->length; i++) {
2862 emit_block_mov(ir, type->fields.structure[i].type, l, r,
2863 cond, cond_swap);
2864 }
2865 return;
2866 }
2867
2868 if (type->is_array()) {
2869 for (unsigned int i = 0; i < type->length; i++) {
2870 emit_block_mov(ir, type->fields.array, l, r, cond, cond_swap);
2871 }
2872 return;
2873 }
2874
2875 if (type->is_matrix()) {
2876 const struct glsl_type *vec_type;
2877
2878 vec_type = glsl_type::get_instance(type->is_double() ? GLSL_TYPE_DOUBLE : GLSL_TYPE_FLOAT,
2879 type->vector_elements, 1);
2880
2881 for (int i = 0; i < type->matrix_columns; i++) {
2882 emit_block_mov(ir, vec_type, l, r, cond, cond_swap);
2883 }
2884 return;
2885 }
2886
2887 assert(type->is_scalar() || type->is_vector());
2888
2889 l->type = type->base_type;
2890 r->type = type->base_type;
2891 if (cond) {
2892 st_src_reg l_src = st_src_reg(*l);
2893
2894 if (l_src.file == PROGRAM_OUTPUT &&
2895 this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
2896 (l_src.index == FRAG_RESULT_DEPTH || l_src.index == FRAG_RESULT_STENCIL)) {
2897 /* This is a special case because the source swizzles will be shifted
2898 * later to account for the difference between GLSL (where they're
2899 * plain floats) and TGSI (where they're Z and Y components). */
2900 l_src.swizzle = SWIZZLE_XXXX;
2901 }
2902
2903 if (native_integers) {
2904 emit_asm(ir, TGSI_OPCODE_UCMP, *l, *cond,
2905 cond_swap ? l_src : *r,
2906 cond_swap ? *r : l_src);
2907 } else {
2908 emit_asm(ir, TGSI_OPCODE_CMP, *l, *cond,
2909 cond_swap ? l_src : *r,
2910 cond_swap ? *r : l_src);
2911 }
2912 } else {
2913 emit_asm(ir, TGSI_OPCODE_MOV, *l, *r);
2914 }
2915 l->index++;
2916 r->index++;
2917 if (type->is_dual_slot()) {
2918 l->index++;
2919 if (r->is_double_vertex_input == false)
2920 r->index++;
2921 }
2922 }
2923
2924 void
2925 glsl_to_tgsi_visitor::visit(ir_assignment *ir)
2926 {
2927 int dst_component;
2928 st_dst_reg l;
2929 st_src_reg r;
2930
2931 /* all generated instructions need to be flaged as precise */
2932 this->precise = is_precise(ir->lhs->variable_referenced());
2933 ir->rhs->accept(this);
2934 r = this->result;
2935
2936 l = get_assignment_lhs(ir->lhs, this, &dst_component);
2937
2938 {
2939 int swizzles[4];
2940 int first_enabled_chan = 0;
2941 int rhs_chan = 0;
2942 ir_variable *variable = ir->lhs->variable_referenced();
2943
2944 if (shader->Stage == MESA_SHADER_FRAGMENT &&
2945 variable->data.mode == ir_var_shader_out &&
2946 (variable->data.location == FRAG_RESULT_DEPTH ||
2947 variable->data.location == FRAG_RESULT_STENCIL)) {
2948 assert(ir->lhs->type->is_scalar());
2949 assert(ir->write_mask == WRITEMASK_X);
2950
2951 if (variable->data.location == FRAG_RESULT_DEPTH)
2952 l.writemask = WRITEMASK_Z;
2953 else {
2954 assert(variable->data.location == FRAG_RESULT_STENCIL);
2955 l.writemask = WRITEMASK_Y;
2956 }
2957 } else if (ir->write_mask == 0) {
2958 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
2959
2960 unsigned num_elements = ir->lhs->type->without_array()->vector_elements;
2961
2962 if (num_elements) {
2963 l.writemask = u_bit_consecutive(0, num_elements);
2964 } else {
2965 /* The type is a struct or an array of (array of) structs. */
2966 l.writemask = WRITEMASK_XYZW;
2967 }
2968 } else {
2969 l.writemask = ir->write_mask;
2970 }
2971
2972 for (int i = 0; i < 4; i++) {
2973 if (l.writemask & (1 << i)) {
2974 first_enabled_chan = GET_SWZ(r.swizzle, i);
2975 break;
2976 }
2977 }
2978
2979 l.writemask = l.writemask << dst_component;
2980
2981 /* Swizzle a small RHS vector into the channels being written.
2982 *
2983 * glsl ir treats write_mask as dictating how many channels are
2984 * present on the RHS while TGSI treats write_mask as just
2985 * showing which channels of the vec4 RHS get written.
2986 */
2987 for (int i = 0; i < 4; i++) {
2988 if (l.writemask & (1 << i))
2989 swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++);
2990 else
2991 swizzles[i] = first_enabled_chan;
2992 }
2993 r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1],
2994 swizzles[2], swizzles[3]);
2995 }
2996
2997 assert(l.file != PROGRAM_UNDEFINED);
2998 assert(r.file != PROGRAM_UNDEFINED);
2999
3000 if (ir->condition) {
3001 const bool switch_order = this->process_move_condition(ir->condition);
3002 st_src_reg condition = this->result;
3003
3004 emit_block_mov(ir, ir->lhs->type, &l, &r, &condition, switch_order);
3005 } else if (ir->rhs->as_expression() &&
3006 this->instructions.get_tail() &&
3007 ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir &&
3008 !((glsl_to_tgsi_instruction *)this->instructions.get_tail())->is_64bit_expanded &&
3009 type_size(ir->lhs->type) == 1 &&
3010 l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst[0].writemask) {
3011 /* To avoid emitting an extra MOV when assigning an expression to a
3012 * variable, emit the last instruction of the expression again, but
3013 * replace the destination register with the target of the assignment.
3014 * Dead code elimination will remove the original instruction.
3015 */
3016 glsl_to_tgsi_instruction *inst, *new_inst;
3017 inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
3018 new_inst = emit_asm(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2], inst->src[3]);
3019 new_inst->saturate = inst->saturate;
3020 new_inst->resource = inst->resource;
3021 inst->dead_mask = inst->dst[0].writemask;
3022 } else {
3023 emit_block_mov(ir, ir->rhs->type, &l, &r, NULL, false);
3024 }
3025 this->precise = 0;
3026 }
3027
3028
3029 void
3030 glsl_to_tgsi_visitor::visit(ir_constant *ir)
3031 {
3032 st_src_reg src;
3033 GLdouble stack_vals[4] = { 0 };
3034 gl_constant_value *values = (gl_constant_value *) stack_vals;
3035 GLenum gl_type = GL_NONE;
3036 unsigned int i;
3037 static int in_array = 0;
3038 gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE;
3039
3040 /* Unfortunately, 4 floats is all we can get into
3041 * _mesa_add_typed_unnamed_constant. So, make a temp to store an
3042 * aggregate constant and move each constant value into it. If we
3043 * get lucky, copy propagation will eliminate the extra moves.
3044 */
3045 if (ir->type->is_record()) {
3046 st_src_reg temp_base = get_temp(ir->type);
3047 st_dst_reg temp = st_dst_reg(temp_base);
3048
3049 for (i = 0; i < ir->type->length; i++) {
3050 ir_constant *const field_value = ir->get_record_field(i);
3051 int size = type_size(field_value->type);
3052
3053 assert(size > 0);
3054
3055 field_value->accept(this);
3056 src = this->result;
3057
3058 for (unsigned j = 0; j < (unsigned int)size; j++) {
3059 emit_asm(ir, TGSI_OPCODE_MOV, temp, src);
3060
3061 src.index++;
3062 temp.index++;
3063 }
3064 }
3065 this->result = temp_base;
3066 return;
3067 }
3068
3069 if (ir->type->is_array()) {
3070 st_src_reg temp_base = get_temp(ir->type);
3071 st_dst_reg temp = st_dst_reg(temp_base);
3072 int size = type_size(ir->type->fields.array);
3073
3074 assert(size > 0);
3075 in_array++;
3076
3077 for (i = 0; i < ir->type->length; i++) {
3078 ir->const_elements[i]->accept(this);
3079 src = this->result;
3080 for (int j = 0; j < size; j++) {
3081 emit_asm(ir, TGSI_OPCODE_MOV, temp, src);
3082
3083 src.index++;
3084 temp.index++;
3085 }
3086 }
3087 this->result = temp_base;
3088 in_array--;
3089 return;
3090 }
3091
3092 if (ir->type->is_matrix()) {
3093 st_src_reg mat = get_temp(ir->type);
3094 st_dst_reg mat_column = st_dst_reg(mat);
3095
3096 for (i = 0; i < ir->type->matrix_columns; i++) {
3097 switch (ir->type->base_type) {
3098 case GLSL_TYPE_FLOAT:
3099 values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements];
3100
3101 src = st_src_reg(file, -1, ir->type->base_type);
3102 src.index = add_constant(file,
3103 values,
3104 ir->type->vector_elements,
3105 GL_FLOAT,
3106 &src.swizzle);
3107 emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
3108 break;
3109 case GLSL_TYPE_DOUBLE:
3110 values = (gl_constant_value *) &ir->value.d[i * ir->type->vector_elements];
3111 src = st_src_reg(file, -1, ir->type->base_type);
3112 src.index = add_constant(file,
3113 values,
3114 ir->type->vector_elements,
3115 GL_DOUBLE,
3116 &src.swizzle);
3117 if (ir->type->vector_elements >= 2) {
3118 mat_column.writemask = WRITEMASK_XY;
3119 src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y);
3120 emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
3121 } else {
3122 mat_column.writemask = WRITEMASK_X;
3123 src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X);
3124 emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
3125 }
3126 src.index++;
3127 if (ir->type->vector_elements > 2) {
3128 if (ir->type->vector_elements == 4) {
3129 mat_column.writemask = WRITEMASK_ZW;
3130 src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y);
3131 emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
3132 } else {
3133 mat_column.writemask = WRITEMASK_Z;
3134 src.swizzle = MAKE_SWIZZLE4(SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y);
3135 emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src);
3136 mat_column.writemask = WRITEMASK_XYZW;
3137 src.swizzle = SWIZZLE_XYZW;
3138 }
3139 mat_column.index++;
3140 }
3141 break;
3142 default:
3143 unreachable("Illegal matrix constant type.\n");
3144 break;
3145 }
3146 mat_column.index++;
3147 }
3148 this->result = mat;
3149 return;
3150 }
3151
3152 switch (ir->type->base_type) {
3153 case GLSL_TYPE_FLOAT:
3154 gl_type = GL_FLOAT;
3155 for (i = 0; i < ir->type->vector_elements; i++) {
3156 values[i].f = ir->value.f[i];
3157 }
3158 break;
3159 case GLSL_TYPE_DOUBLE:
3160 gl_type = GL_DOUBLE;
3161 for (i = 0; i < ir->type->vector_elements; i++) {
3162 memcpy(&values[i * 2], &ir->value.d[i], sizeof(double));
3163 }
3164 break;
3165 case GLSL_TYPE_INT64:
3166 gl_type = GL_INT64_ARB;
3167 for (i = 0; i < ir->type->vector_elements; i++) {
3168 memcpy(&values[i * 2], &ir->value.d[i], sizeof(int64_t));
3169 }
3170 break;
3171 case GLSL_TYPE_UINT64:
3172 gl_type = GL_UNSIGNED_INT64_ARB;
3173 for (i = 0; i < ir->type->vector_elements; i++) {
3174 memcpy(&values[i * 2], &ir->value.d[i], sizeof(uint64_t));
3175 }
3176 break;
3177 case GLSL_TYPE_UINT:
3178 gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT;
3179 for (i = 0; i < ir->type->vector_elements; i++) {
3180 if (native_integers)
3181 values[i].u = ir->value.u[i];
3182 else
3183 values[i].f = ir->value.u[i];
3184 }
3185 break;
3186 case GLSL_TYPE_INT:
3187 gl_type = native_integers ? GL_INT : GL_FLOAT;
3188 for (i = 0; i < ir->type->vector_elements; i++) {
3189 if (native_integers)
3190 values[i].i = ir->value.i[i];
3191 else
3192 values[i].f = ir->value.i[i];
3193 }
3194 break;
3195 case GLSL_TYPE_BOOL:
3196 gl_type = native_integers ? GL_BOOL : GL_FLOAT;
3197 for (i = 0; i < ir->type->vector_elements; i++) {
3198 values[i].u = ir->value.b[i] ? ctx->Const.UniformBooleanTrue : 0;
3199 }
3200 break;
3201 default:
3202 assert(!"Non-float/uint/int/bool constant");
3203 }
3204
3205 this->result = st_src_reg(file, -1, ir->type);
3206 this->result.index = add_constant(file,
3207 values,
3208 ir->type->vector_elements,
3209 gl_type,
3210 &this->result.swizzle);
3211 }
3212
3213 void
3214 glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
3215 {
3216 exec_node *param = ir->actual_parameters.get_head();
3217 ir_dereference *deref = static_cast<ir_dereference *>(param);
3218 ir_variable *location = deref->variable_referenced();
3219 bool has_hw_atomics = st_context(ctx)->has_hw_atomics;
3220 /* Calculate the surface offset */
3221 st_src_reg offset;
3222 unsigned array_size = 0, base = 0;
3223 uint16_t index = 0;
3224 st_src_reg resource;
3225
3226 get_deref_offsets(deref, &array_size, &base, &index, &offset, false);
3227
3228 if (has_hw_atomics) {
3229 variable_storage *entry = find_variable_storage(location);
3230 st_src_reg buffer(PROGRAM_HW_ATOMIC, 0, GLSL_TYPE_ATOMIC_UINT, location->data.binding);
3231
3232 if (!entry) {
3233 entry = new(mem_ctx) variable_storage(location, PROGRAM_HW_ATOMIC,
3234 num_atomics);
3235 _mesa_hash_table_insert(this->variables, location, entry);
3236
3237 atomic_info[num_atomics].location = location->data.location;
3238 atomic_info[num_atomics].binding = location->data.binding;
3239 atomic_info[num_atomics].size = location->type->arrays_of_arrays_size();
3240 if (atomic_info[num_atomics].size == 0)
3241 atomic_info[num_atomics].size = 1;
3242 atomic_info[num_atomics].array_id = 0;
3243 num_atomics++;
3244 }
3245
3246 if (offset.file != PROGRAM_UNDEFINED) {
3247 if (atomic_info[entry->index].array_id == 0) {
3248 num_atomic_arrays++;
3249 atomic_info[entry->index].array_id = num_atomic_arrays;
3250 }
3251 buffer.array_id = atomic_info[entry->index].array_id;
3252 }
3253
3254 buffer.index = index;
3255 buffer.index += location->data.offset / ATOMIC_COUNTER_SIZE;
3256 buffer.has_index2 = true;
3257
3258 if (offset.file != PROGRAM_UNDEFINED) {
3259 buffer.reladdr = ralloc(mem_ctx, st_src_reg);
3260 *buffer.reladdr = offset;
3261 emit_arl(ir, sampler_reladdr, offset);
3262 }
3263 offset = st_src_reg_for_int(0);
3264
3265 resource = buffer;
3266 } else {
3267 st_src_reg buffer(PROGRAM_BUFFER, location->data.binding,
3268 GLSL_TYPE_ATOMIC_UINT);
3269
3270 if (offset.file != PROGRAM_UNDEFINED) {
3271 emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(offset),
3272 offset, st_src_reg_for_int(ATOMIC_COUNTER_SIZE));
3273 emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(offset),
3274 offset, st_src_reg_for_int(location->data.offset + index * ATOMIC_COUNTER_SIZE));
3275 } else {
3276 offset = st_src_reg_for_int(location->data.offset + index * ATOMIC_COUNTER_SIZE);
3277 }
3278 resource = buffer;
3279 }
3280
3281 ir->return_deref->accept(this);
3282 st_dst_reg dst(this->result);
3283 dst.writemask = WRITEMASK_X;
3284
3285 glsl_to_tgsi_instruction *inst;
3286
3287 if (ir->callee->intrinsic_id == ir_intrinsic_atomic_counter_read) {
3288 inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, offset);
3289 } else if (ir->callee->intrinsic_id == ir_intrinsic_atomic_counter_increment) {
3290 inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset,
3291 st_src_reg_for_int(1));
3292 } else if (ir->callee->intrinsic_id == ir_intrinsic_atomic_counter_predecrement) {
3293 inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset,
3294 st_src_reg_for_int(-1));
3295 emit_asm(ir, TGSI_OPCODE_ADD, dst, this->result, st_src_reg_for_int(-1));
3296 } else {
3297 param = param->get_next();
3298 ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
3299 val->accept(this);
3300
3301 st_src_reg data = this->result, data2 = undef_src;
3302 unsigned opcode;
3303 switch (ir->callee->intrinsic_id) {
3304 case ir_intrinsic_atomic_counter_add:
3305 opcode = TGSI_OPCODE_ATOMUADD;
3306 break;
3307 case ir_intrinsic_atomic_counter_min:
3308 opcode = TGSI_OPCODE_ATOMIMIN;
3309 break;
3310 case ir_intrinsic_atomic_counter_max:
3311 opcode = TGSI_OPCODE_ATOMIMAX;
3312 break;
3313 case ir_intrinsic_atomic_counter_and:
3314 opcode = TGSI_OPCODE_ATOMAND;
3315 break;
3316 case ir_intrinsic_atomic_counter_or:
3317 opcode = TGSI_OPCODE_ATOMOR;
3318 break;
3319 case ir_intrinsic_atomic_counter_xor:
3320 opcode = TGSI_OPCODE_ATOMXOR;
3321 break;
3322 case ir_intrinsic_atomic_counter_exchange:
3323 opcode = TGSI_OPCODE_ATOMXCHG;
3324 break;
3325 case ir_intrinsic_atomic_counter_comp_swap: {
3326 opcode = TGSI_OPCODE_ATOMCAS;
3327 param = param->get_next();
3328 val = ((ir_instruction *)param)->as_rvalue();
3329 val->accept(this);
3330 data2 = this->result;
3331 break;
3332 }
3333 default:
3334 assert(!"Unexpected intrinsic");
3335 return;
3336 }
3337
3338 inst = emit_asm(ir, opcode, dst, offset, data, data2);
3339 }
3340
3341 inst->resource = resource;
3342 }
3343
3344 void
3345 glsl_to_tgsi_visitor::visit_ssbo_intrinsic(ir_call *ir)
3346 {
3347 exec_node *param = ir->actual_parameters.get_head();
3348
3349 ir_rvalue *block = ((ir_instruction *)param)->as_rvalue();
3350
3351 param = param->get_next();
3352 ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
3353
3354 ir_constant *const_block = block->as_constant();
3355 int buf_base = st_context(ctx)->has_hw_atomics ? 0 : ctx->Const.Program[shader->Stage].MaxAtomicBuffers;
3356 st_src_reg buffer(
3357 PROGRAM_BUFFER,
3358 buf_base + (const_block ? const_block->value.u[0] : 0),
3359 GLSL_TYPE_UINT);
3360
3361 if (!const_block) {
3362 block->accept(this);
3363 buffer.reladdr = ralloc(mem_ctx, st_src_reg);
3364 *buffer.reladdr = this->result;
3365 emit_arl(ir, sampler_reladdr, this->result);
3366 }
3367
3368 /* Calculate the surface offset */
3369 offset->accept(this);
3370 st_src_reg off = this->result;
3371
3372 st_dst_reg dst = undef_dst;
3373 if (ir->return_deref) {
3374 ir->return_deref->accept(this);
3375 dst = st_dst_reg(this->result);
3376 dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1;
3377 }
3378
3379 glsl_to_tgsi_instruction *inst;
3380
3381 if (ir->callee->intrinsic_id == ir_intrinsic_ssbo_load) {
3382 inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, off);
3383 if (dst.type == GLSL_TYPE_BOOL)
3384 emit_asm(ir, TGSI_OPCODE_USNE, dst, st_src_reg(dst), st_src_reg_for_int(0));
3385 } else if (ir->callee->intrinsic_id == ir_intrinsic_ssbo_store) {
3386 param = param->get_next();
3387 ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
3388 val->accept(this);
3389
3390 param = param->get_next();
3391 ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
3392 assert(write_mask);
3393 dst.writemask = write_mask->value.u[0];
3394
3395 dst.type = this->result.type;
3396 inst = emit_asm(ir, TGSI_OPCODE_STORE, dst, off, this->result);
3397 } else {
3398 param = param->get_next();
3399 ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
3400 val->accept(this);
3401
3402 st_src_reg data = this->result, data2 = undef_src;
3403 unsigned opcode;
3404 switch (ir->callee->intrinsic_id) {
3405 case ir_intrinsic_ssbo_atomic_add:
3406 opcode = TGSI_OPCODE_ATOMUADD;
3407 break;
3408 case ir_intrinsic_ssbo_atomic_min:
3409 opcode = TGSI_OPCODE_ATOMIMIN;
3410 break;
3411 case ir_intrinsic_ssbo_atomic_max:
3412 opcode = TGSI_OPCODE_ATOMIMAX;
3413 break;
3414 case ir_intrinsic_ssbo_atomic_and:
3415 opcode = TGSI_OPCODE_ATOMAND;
3416 break;
3417 case ir_intrinsic_ssbo_atomic_or:
3418 opcode = TGSI_OPCODE_ATOMOR;
3419 break;
3420 case ir_intrinsic_ssbo_atomic_xor:
3421 opcode = TGSI_OPCODE_ATOMXOR;
3422 break;
3423 case ir_intrinsic_ssbo_atomic_exchange:
3424 opcode = TGSI_OPCODE_ATOMXCHG;
3425 break;
3426 case ir_intrinsic_ssbo_atomic_comp_swap:
3427 opcode = TGSI_OPCODE_ATOMCAS;
3428 param = param->get_next();
3429 val = ((ir_instruction *)param)->as_rvalue();
3430 val->accept(this);
3431 data2 = this->result;
3432 break;
3433 default:
3434 assert(!"Unexpected intrinsic");
3435 return;
3436 }
3437
3438 inst = emit_asm(ir, opcode, dst, off, data, data2);
3439 }
3440
3441 param = param->get_next();
3442 ir_constant *access = NULL;
3443 if (!param->is_tail_sentinel()) {
3444 access = ((ir_instruction *)param)->as_constant();
3445 assert(access);
3446 }
3447
3448 add_buffer_to_load_and_stores(inst, &buffer, &this->instructions, access);
3449 }
3450
3451 void
3452 glsl_to_tgsi_visitor::visit_membar_intrinsic(ir_call *ir)
3453 {
3454 switch (ir->callee->intrinsic_id) {
3455 case ir_intrinsic_memory_barrier:
3456 emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
3457 st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER |
3458 TGSI_MEMBAR_ATOMIC_BUFFER |
3459 TGSI_MEMBAR_SHADER_IMAGE |
3460 TGSI_MEMBAR_SHARED));
3461 break;
3462 case ir_intrinsic_memory_barrier_atomic_counter:
3463 emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
3464 st_src_reg_for_int(TGSI_MEMBAR_ATOMIC_BUFFER));
3465 break;
3466 case ir_intrinsic_memory_barrier_buffer:
3467 emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
3468 st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER));
3469 break;
3470 case ir_intrinsic_memory_barrier_image:
3471 emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
3472 st_src_reg_for_int(TGSI_MEMBAR_SHADER_IMAGE));
3473 break;
3474 case ir_intrinsic_memory_barrier_shared:
3475 emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
3476 st_src_reg_for_int(TGSI_MEMBAR_SHARED));
3477 break;
3478 case ir_intrinsic_group_memory_barrier:
3479 emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst,
3480 st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER |
3481 TGSI_MEMBAR_ATOMIC_BUFFER |
3482 TGSI_MEMBAR_SHADER_IMAGE |
3483 TGSI_MEMBAR_SHARED |
3484 TGSI_MEMBAR_THREAD_GROUP));
3485 break;
3486 default:
3487 assert(!"Unexpected memory barrier intrinsic");
3488 }
3489 }
3490
3491 void
3492 glsl_to_tgsi_visitor::visit_shared_intrinsic(ir_call *ir)
3493 {
3494 exec_node *param = ir->actual_parameters.get_head();
3495
3496 ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();
3497
3498 st_src_reg buffer(PROGRAM_MEMORY, 0, GLSL_TYPE_UINT);
3499
3500 /* Calculate the surface offset */
3501 offset->accept(this);
3502 st_src_reg off = this->result;
3503
3504 st_dst_reg dst = undef_dst;
3505 if (ir->return_deref) {
3506 ir->return_deref->accept(this);
3507 dst = st_dst_reg(this->result);
3508 dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1;
3509 }
3510
3511 glsl_to_tgsi_instruction *inst;
3512
3513 if (ir->callee->intrinsic_id == ir_intrinsic_shared_load) {
3514 inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, off);
3515 inst->resource = buffer;
3516 } else if (ir->callee->intrinsic_id == ir_intrinsic_shared_store) {
3517 param = param->get_next();
3518 ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
3519 val->accept(this);
3520
3521 param = param->get_next();
3522 ir_constant *write_mask = ((ir_instruction *)param)->as_constant();
3523 assert(write_mask);
3524 dst.writemask = write_mask->value.u[0];
3525
3526 dst.type = this->result.type;
3527 inst = emit_asm(ir, TGSI_OPCODE_STORE, dst, off, this->result);
3528 inst->resource = buffer;
3529 } else {
3530 param = param->get_next();
3531 ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
3532 val->accept(this);
3533
3534 st_src_reg data = this->result, data2 = undef_src;
3535 unsigned opcode;
3536 switch (ir->callee->intrinsic_id) {
3537 case ir_intrinsic_shared_atomic_add:
3538 opcode = TGSI_OPCODE_ATOMUADD;
3539 break;
3540 case ir_intrinsic_shared_atomic_min:
3541 opcode = TGSI_OPCODE_ATOMIMIN;
3542 break;
3543 case ir_intrinsic_shared_atomic_max:
3544 opcode = TGSI_OPCODE_ATOMIMAX;
3545 break;
3546 case ir_intrinsic_shared_atomic_and:
3547 opcode = TGSI_OPCODE_ATOMAND;
3548 break;
3549 case ir_intrinsic_shared_atomic_or:
3550 opcode = TGSI_OPCODE_ATOMOR;
3551 break;
3552 case ir_intrinsic_shared_atomic_xor:
3553 opcode = TGSI_OPCODE_ATOMXOR;
3554 break;
3555 case ir_intrinsic_shared_atomic_exchange:
3556 opcode = TGSI_OPCODE_ATOMXCHG;
3557 break;
3558 case ir_intrinsic_shared_atomic_comp_swap:
3559 opcode = TGSI_OPCODE_ATOMCAS;
3560 param = param->get_next();
3561 val = ((ir_instruction *)param)->as_rvalue();
3562 val->accept(this);
3563 data2 = this->result;
3564 break;
3565 default:
3566 assert(!"Unexpected intrinsic");
3567 return;
3568 }
3569
3570 inst = emit_asm(ir, opcode, dst, off, data, data2);
3571 inst->resource = buffer;
3572 }
3573 }
3574
3575 static void
3576 get_image_qualifiers(ir_dereference *ir, const glsl_type **type,
3577 bool *memory_coherent, bool *memory_volatile,
3578 bool *memory_restrict, unsigned *image_format)
3579 {
3580
3581 switch (ir->ir_type) {
3582 case ir_type_dereference_record: {
3583 ir_dereference_record *deref_record = ir->as_dereference_record();
3584 const glsl_type *struct_type = deref_record->record->type;
3585 int fild_idx = deref_record->field_idx;
3586
3587 *type = struct_type->fields.structure[fild_idx].type->without_array();
3588 *memory_coherent =
3589 struct_type->fields.structure[fild_idx].memory_coherent;
3590 *memory_volatile =
3591 struct_type->fields.structure[fild_idx].memory_volatile;
3592 *memory_restrict =
3593 struct_type->fields.structure[fild_idx].memory_restrict;
3594 *image_format =
3595 struct_type->fields.structure[fild_idx].image_format;
3596 break;
3597 }
3598
3599 case ir_type_dereference_array: {
3600 ir_dereference_array *deref_arr = ir->as_dereference_array();
3601 get_image_qualifiers((ir_dereference *)deref_arr->array, type,
3602 memory_coherent, memory_volatile, memory_restrict,
3603 image_format);
3604 break;
3605 }
3606
3607 case ir_type_dereference_variable: {
3608 ir_variable *var = ir->variable_referenced();
3609
3610 *type = var->type->without_array();
3611 *memory_coherent = var->data.memory_coherent;
3612 *memory_volatile = var->data.memory_volatile;
3613 *memory_restrict = var->data.memory_restrict;
3614 *image_format = var->data.image_format;
3615 break;
3616 }
3617
3618 default:
3619 break;
3620 }
3621 }
3622
3623 void
3624 glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir)
3625 {
3626 exec_node *param = ir->actual_parameters.get_head();
3627
3628 ir_dereference *img = (ir_dereference *)param;
3629 const ir_variable *imgvar = img->variable_referenced();
3630 unsigned sampler_array_size = 1, sampler_base = 0;
3631 bool memory_coherent = false, memory_volatile = false, memory_restrict = false;
3632 unsigned image_format = 0;
3633 const glsl_type *type = NULL;
3634
3635 get_image_qualifiers(img, &type, &memory_coherent, &memory_volatile,
3636 &memory_restrict, &image_format);
3637
3638 st_src_reg reladdr;
3639 st_src_reg image(PROGRAM_IMAGE, 0, GLSL_TYPE_UINT);
3640 uint16_t index = 0;
3641 get_deref_offsets(img, &sampler_array_size, &sampler_base,
3642 &index, &reladdr, !imgvar->contains_bindless());
3643
3644 image.index = index;
3645 if (reladdr.file != PROGRAM_UNDEFINED) {
3646 image.reladdr = ralloc(mem_ctx, st_src_reg);
3647 *image.reladdr = reladdr;
3648 emit_arl(ir, sampler_reladdr, reladdr);
3649 }
3650
3651 st_dst_reg dst = undef_dst;
3652 if (ir->return_deref) {
3653 ir->return_deref->accept(this);
3654 dst = st_dst_reg(this->result);
3655 dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1;
3656 }
3657
3658 glsl_to_tgsi_instruction *inst;
3659
3660 st_src_reg bindless;
3661 if (imgvar->contains_bindless()) {
3662 img->accept(this);
3663 bindless = this->result;
3664 }
3665
3666 if (ir->callee->intrinsic_id == ir_intrinsic_image_size) {
3667 dst.writemask = WRITEMASK_XYZ;
3668 inst = emit_asm(ir, TGSI_OPCODE_RESQ, dst);
3669 } else if (ir->callee->intrinsic_id == ir_intrinsic_image_samples) {
3670 st_src_reg res = get_temp(glsl_type::ivec4_type);
3671 st_dst_reg dstres = st_dst_reg(res);
3672 dstres.writemask = WRITEMASK_W;
3673 inst = emit_asm(ir, TGSI_OPCODE_RESQ, dstres);
3674 res.swizzle = SWIZZLE_WWWW;
3675 emit_asm(ir, TGSI_OPCODE_MOV, dst, res);
3676 } else {
3677 st_src_reg arg1 = undef_src, arg2 = undef_src;
3678 st_src_reg coord;
3679 st_dst_reg coord_dst;
3680 coord = get_temp(glsl_type::ivec4_type);
3681 coord_dst = st_dst_reg(coord);
3682 coord_dst.writemask = (1 << type->coordinate_components()) - 1;
3683 param = param->get_next();
3684 ((ir_dereference *)param)->accept(this);
3685 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
3686 coord.swizzle = SWIZZLE_XXXX;
3687 switch (type->coordinate_components()) {
3688 case 4: assert(!"unexpected coord count");
3689 /* fallthrough */
3690 case 3: coord.swizzle |= SWIZZLE_Z << 6;
3691 /* fallthrough */
3692 case 2: coord.swizzle |= SWIZZLE_Y << 3;
3693 }
3694
3695 if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) {
3696 param = param->get_next();
3697 ((ir_dereference *)param)->accept(this);
3698 st_src_reg sample = this->result;
3699 sample.swizzle = SWIZZLE_XXXX;
3700 coord_dst.writemask = WRITEMASK_W;
3701 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, sample);
3702 coord.swizzle |= SWIZZLE_W << 9;
3703 }
3704
3705 param = param->get_next();
3706 if (!param->is_tail_sentinel()) {
3707 ((ir_dereference *)param)->accept(this);
3708 arg1 = this->result;
3709 param = param->get_next();
3710 }
3711
3712 if (!param->is_tail_sentinel()) {
3713 ((ir_dereference *)param)->accept(this);
3714 arg2 = this->result;
3715 param = param->get_next();
3716 }
3717
3718 assert(param->is_tail_sentinel());
3719
3720 unsigned opcode;
3721 switch (ir->callee->intrinsic_id) {
3722 case ir_intrinsic_image_load:
3723 opcode = TGSI_OPCODE_LOAD;
3724 break;
3725 case ir_intrinsic_image_store:
3726 opcode = TGSI_OPCODE_STORE;
3727 break;
3728 case ir_intrinsic_image_atomic_add:
3729 opcode = TGSI_OPCODE_ATOMUADD;
3730 break;
3731 case ir_intrinsic_image_atomic_min:
3732 opcode = TGSI_OPCODE_ATOMIMIN;
3733 break;
3734 case ir_intrinsic_image_atomic_max:
3735 opcode = TGSI_OPCODE_ATOMIMAX;
3736 break;
3737 case ir_intrinsic_image_atomic_and:
3738 opcode = TGSI_OPCODE_ATOMAND;
3739 break;
3740 case ir_intrinsic_image_atomic_or:
3741 opcode = TGSI_OPCODE_ATOMOR;
3742 break;
3743 case ir_intrinsic_image_atomic_xor:
3744 opcode = TGSI_OPCODE_ATOMXOR;
3745 break;
3746 case ir_intrinsic_image_atomic_exchange:
3747 opcode = TGSI_OPCODE_ATOMXCHG;
3748 break;
3749 case ir_intrinsic_image_atomic_comp_swap:
3750 opcode = TGSI_OPCODE_ATOMCAS;
3751 break;
3752 default:
3753 assert(!"Unexpected intrinsic");
3754 return;
3755 }
3756
3757 inst = emit_asm(ir, opcode, dst, coord, arg1, arg2);
3758 if (opcode == TGSI_OPCODE_STORE)
3759 inst->dst[0].writemask = WRITEMASK_XYZW;
3760 }
3761
3762 if (imgvar->contains_bindless()) {
3763 inst->resource = bindless;
3764 inst->resource.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
3765 SWIZZLE_X, SWIZZLE_Y);
3766 } else {
3767 inst->resource = image;
3768 inst->sampler_array_size = sampler_array_size;
3769 inst->sampler_base = sampler_base;
3770 }
3771
3772 inst->tex_target = type->sampler_index();
3773 inst->image_format = st_mesa_format_to_pipe_format(st_context(ctx),
3774 _mesa_get_shader_image_format(image_format));
3775
3776 if (memory_coherent)
3777 inst->buffer_access |= TGSI_MEMORY_COHERENT;
3778 if (memory_restrict)
3779 inst->buffer_access |= TGSI_MEMORY_RESTRICT;
3780 if (memory_volatile)
3781 inst->buffer_access |= TGSI_MEMORY_VOLATILE;
3782 }
3783
3784 void
3785 glsl_to_tgsi_visitor::visit_generic_intrinsic(ir_call *ir, unsigned op)
3786 {
3787 ir->return_deref->accept(this);
3788 st_dst_reg dst = st_dst_reg(this->result);
3789
3790 dst.writemask = u_bit_consecutive(0, ir->return_deref->var->type->vector_elements);
3791
3792 st_src_reg src[4] = { undef_src, undef_src, undef_src, undef_src };
3793 unsigned num_src = 0;
3794 foreach_in_list(ir_rvalue, param, &ir->actual_parameters) {
3795 assert(num_src < ARRAY_SIZE(src));
3796
3797 this->result.file = PROGRAM_UNDEFINED;
3798 param->accept(this);
3799 assert(this->result.file != PROGRAM_UNDEFINED);
3800
3801 src[num_src] = this->result;
3802 num_src++;
3803 }
3804
3805 emit_asm(ir, op, dst, src[0], src[1], src[2], src[3]);
3806 }
3807
3808 void
3809 glsl_to_tgsi_visitor::visit(ir_call *ir)
3810 {
3811 ir_function_signature *sig = ir->callee;
3812
3813 /* Filter out intrinsics */
3814 switch (sig->intrinsic_id) {
3815 case ir_intrinsic_atomic_counter_read:
3816 case ir_intrinsic_atomic_counter_increment:
3817 case ir_intrinsic_atomic_counter_predecrement:
3818 case ir_intrinsic_atomic_counter_add:
3819 case ir_intrinsic_atomic_counter_min:
3820 case ir_intrinsic_atomic_counter_max:
3821 case ir_intrinsic_atomic_counter_and:
3822 case ir_intrinsic_atomic_counter_or:
3823 case ir_intrinsic_atomic_counter_xor:
3824 case ir_intrinsic_atomic_counter_exchange:
3825 case ir_intrinsic_atomic_counter_comp_swap:
3826 visit_atomic_counter_intrinsic(ir);
3827 return;
3828
3829 case ir_intrinsic_ssbo_load:
3830 case ir_intrinsic_ssbo_store:
3831 case ir_intrinsic_ssbo_atomic_add:
3832 case ir_intrinsic_ssbo_atomic_min:
3833 case ir_intrinsic_ssbo_atomic_max:
3834 case ir_intrinsic_ssbo_atomic_and:
3835 case ir_intrinsic_ssbo_atomic_or:
3836 case ir_intrinsic_ssbo_atomic_xor:
3837 case ir_intrinsic_ssbo_atomic_exchange:
3838 case ir_intrinsic_ssbo_atomic_comp_swap:
3839 visit_ssbo_intrinsic(ir);
3840 return;
3841
3842 case ir_intrinsic_memory_barrier:
3843 case ir_intrinsic_memory_barrier_atomic_counter:
3844 case ir_intrinsic_memory_barrier_buffer:
3845 case ir_intrinsic_memory_barrier_image:
3846 case ir_intrinsic_memory_barrier_shared:
3847 case ir_intrinsic_group_memory_barrier:
3848 visit_membar_intrinsic(ir);
3849 return;
3850
3851 case ir_intrinsic_shared_load:
3852 case ir_intrinsic_shared_store:
3853 case ir_intrinsic_shared_atomic_add:
3854 case ir_intrinsic_shared_atomic_min:
3855 case ir_intrinsic_shared_atomic_max:
3856 case ir_intrinsic_shared_atomic_and:
3857 case ir_intrinsic_shared_atomic_or:
3858 case ir_intrinsic_shared_atomic_xor:
3859 case ir_intrinsic_shared_atomic_exchange:
3860 case ir_intrinsic_shared_atomic_comp_swap:
3861 visit_shared_intrinsic(ir);
3862 return;
3863
3864 case ir_intrinsic_image_load:
3865 case ir_intrinsic_image_store:
3866 case ir_intrinsic_image_atomic_add:
3867 case ir_intrinsic_image_atomic_min:
3868 case ir_intrinsic_image_atomic_max:
3869 case ir_intrinsic_image_atomic_and:
3870 case ir_intrinsic_image_atomic_or:
3871 case ir_intrinsic_image_atomic_xor:
3872 case ir_intrinsic_image_atomic_exchange:
3873 case ir_intrinsic_image_atomic_comp_swap:
3874 case ir_intrinsic_image_size:
3875 case ir_intrinsic_image_samples:
3876 visit_image_intrinsic(ir);
3877 return;
3878
3879 case ir_intrinsic_shader_clock:
3880 visit_generic_intrinsic(ir, TGSI_OPCODE_CLOCK);
3881 return;
3882
3883 case ir_intrinsic_vote_all:
3884 visit_generic_intrinsic(ir, TGSI_OPCODE_VOTE_ALL);
3885 return;
3886 case ir_intrinsic_vote_any:
3887 visit_generic_intrinsic(ir, TGSI_OPCODE_VOTE_ANY);
3888 return;
3889 case ir_intrinsic_vote_eq:
3890 visit_generic_intrinsic(ir, TGSI_OPCODE_VOTE_EQ);
3891 return;
3892 case ir_intrinsic_ballot:
3893 visit_generic_intrinsic(ir, TGSI_OPCODE_BALLOT);
3894 return;
3895 case ir_intrinsic_read_first_invocation:
3896 visit_generic_intrinsic(ir, TGSI_OPCODE_READ_FIRST);
3897 return;
3898 case ir_intrinsic_read_invocation:
3899 visit_generic_intrinsic(ir, TGSI_OPCODE_READ_INVOC);
3900 return;
3901
3902 case ir_intrinsic_invalid:
3903 case ir_intrinsic_generic_load:
3904 case ir_intrinsic_generic_store:
3905 case ir_intrinsic_generic_atomic_add:
3906 case ir_intrinsic_generic_atomic_and:
3907 case ir_intrinsic_generic_atomic_or:
3908 case ir_intrinsic_generic_atomic_xor:
3909 case ir_intrinsic_generic_atomic_min:
3910 case ir_intrinsic_generic_atomic_max:
3911 case ir_intrinsic_generic_atomic_exchange:
3912 case ir_intrinsic_generic_atomic_comp_swap:
3913 unreachable("Invalid intrinsic");
3914 }
3915 }
3916
3917 void
3918 glsl_to_tgsi_visitor::calc_deref_offsets(ir_dereference *tail,
3919 unsigned *array_elements,
3920 uint16_t *index,
3921 st_src_reg *indirect,
3922 unsigned *location)
3923 {
3924 switch (tail->ir_type) {
3925 case ir_type_dereference_record: {
3926 ir_dereference_record *deref_record = tail->as_dereference_record();
3927 const glsl_type *struct_type = deref_record->record->type;
3928 int field_index = deref_record->field_idx;
3929
3930 calc_deref_offsets(deref_record->record->as_dereference(), array_elements, index, indirect, location);
3931
3932 assert(field_index >= 0);
3933 *location += struct_type->record_location_offset(field_index);
3934 break;
3935 }
3936
3937 case ir_type_dereference_array: {
3938 ir_dereference_array *deref_arr = tail->as_dereference_array();
3939
3940 void *mem_ctx = ralloc_parent(deref_arr);
3941 ir_constant *array_index =
3942 deref_arr->array_index->constant_expression_value(mem_ctx);
3943
3944 if (!array_index) {
3945 st_src_reg temp_reg;
3946 st_dst_reg temp_dst;
3947
3948 temp_reg = get_temp(glsl_type::uint_type);
3949 temp_dst = st_dst_reg(temp_reg);
3950 temp_dst.writemask = 1;
3951
3952 deref_arr->array_index->accept(this);
3953 if (*array_elements != 1)
3954 emit_asm(NULL, TGSI_OPCODE_MUL, temp_dst, this->result, st_src_reg_for_int(*array_elements));
3955 else
3956 emit_asm(NULL, TGSI_OPCODE_MOV, temp_dst, this->result);
3957
3958 if (indirect->file == PROGRAM_UNDEFINED)
3959 *indirect = temp_reg;
3960 else {
3961 temp_dst = st_dst_reg(*indirect);
3962 temp_dst.writemask = 1;
3963 emit_asm(NULL, TGSI_OPCODE_ADD, temp_dst, *indirect, temp_reg);
3964 }
3965 } else
3966 *index += array_index->value.u[0] * *array_elements;
3967
3968 *array_elements *= deref_arr->array->type->length;
3969
3970 calc_deref_offsets(deref_arr->array->as_dereference(), array_elements, index, indirect, location);
3971 break;
3972 }
3973 default:
3974 break;
3975 }
3976 }
3977
3978 void
3979 glsl_to_tgsi_visitor::get_deref_offsets(ir_dereference *ir,
3980 unsigned *array_size,
3981 unsigned *base,
3982 uint16_t *index,
3983 st_src_reg *reladdr,
3984 bool opaque)
3985 {
3986 GLuint shader = _mesa_program_enum_to_shader_stage(this->prog->Target);
3987 unsigned location = 0;
3988 ir_variable *var = ir->variable_referenced();
3989
3990 memset(reladdr, 0, sizeof(*reladdr));
3991 reladdr->file = PROGRAM_UNDEFINED;
3992
3993 *base = 0;
3994 *array_size = 1;
3995
3996 assert(var);
3997 location = var->data.location;
3998 calc_deref_offsets(ir, array_size, index, reladdr, &location);
3999
4000 /*
4001 * If we end up with no indirect then adjust the base to the index,
4002 * and set the array size to 1.
4003 */
4004 if (reladdr->file == PROGRAM_UNDEFINED) {
4005 *base = *index;
4006 *array_size = 1;
4007 }
4008
4009 if (opaque) {
4010 assert(location != 0xffffffff);
4011 *base += this->shader_program->data->UniformStorage[location].opaque[shader].index;
4012 *index += this->shader_program->data->UniformStorage[location].opaque[shader].index;
4013 }
4014 }
4015
4016 st_src_reg
4017 glsl_to_tgsi_visitor::canonicalize_gather_offset(st_src_reg offset)
4018 {
4019 if (offset.reladdr || offset.reladdr2) {
4020 st_src_reg tmp = get_temp(glsl_type::ivec2_type);
4021 st_dst_reg tmp_dst = st_dst_reg(tmp);
4022 tmp_dst.writemask = WRITEMASK_XY;
4023 emit_asm(NULL, TGSI_OPCODE_MOV, tmp_dst, offset);
4024 return tmp;
4025 }
4026
4027 return offset;
4028 }
4029
4030 void
4031 glsl_to_tgsi_visitor::visit(ir_texture *ir)
4032 {
4033 st_src_reg result_src, coord, cube_sc, lod_info, projector, dx, dy;
4034 st_src_reg offset[MAX_GLSL_TEXTURE_OFFSET], sample_index, component;
4035 st_src_reg levels_src, reladdr;
4036 st_dst_reg result_dst, coord_dst, cube_sc_dst;
4037 glsl_to_tgsi_instruction *inst = NULL;
4038 unsigned opcode = TGSI_OPCODE_NOP;
4039 const glsl_type *sampler_type = ir->sampler->type;
4040 unsigned sampler_array_size = 1, sampler_base = 0;
4041 bool is_cube_array = false, is_cube_shadow = false;
4042 ir_variable *var = ir->sampler->variable_referenced();
4043 unsigned i;
4044
4045 /* if we are a cube array sampler or a cube shadow */
4046 if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) {
4047 is_cube_array = sampler_type->sampler_array;
4048 is_cube_shadow = sampler_type->sampler_shadow;
4049 }
4050
4051 if (ir->coordinate) {
4052 ir->coordinate->accept(this);
4053
4054 /* Put our coords in a temp. We'll need to modify them for shadow,
4055 * projection, or LOD, so the only case we'd use it as-is is if
4056 * we're doing plain old texturing. The optimization passes on
4057 * glsl_to_tgsi_visitor should handle cleaning up our mess in that case.
4058 */
4059 coord = get_temp(glsl_type::vec4_type);
4060 coord_dst = st_dst_reg(coord);
4061 coord_dst.writemask = (1 << ir->coordinate->type->vector_elements) - 1;
4062 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
4063 }
4064
4065 if (ir->projector) {
4066 ir->projector->accept(this);
4067 projector = this->result;
4068 }
4069
4070 /* Storage for our result. Ideally for an assignment we'd be using
4071 * the actual storage for the result here, instead.
4072 */
4073 result_src = get_temp(ir->type);
4074 result_dst = st_dst_reg(result_src);
4075 result_dst.writemask = (1 << ir->type->vector_elements) - 1;
4076
4077 switch (ir->op) {
4078 case ir_tex:
4079 opcode = (is_cube_array && ir->shadow_comparator) ? TGSI_OPCODE_TEX2 : TGSI_OPCODE_TEX;
4080 if (ir->offset) {
4081 ir->offset->accept(this);
4082 offset[0] = this->result;
4083 }
4084 break;
4085 case ir_txb:
4086 if (is_cube_array || is_cube_shadow) {
4087 opcode = TGSI_OPCODE_TXB2;
4088 }
4089 else {
4090 opcode = TGSI_OPCODE_TXB;
4091 }
4092 ir->lod_info.bias->accept(this);
4093 lod_info = this->result;
4094 if (ir->offset) {
4095 ir->offset->accept(this);
4096 offset[0] = this->result;
4097 }
4098 break;
4099 case ir_txl:
4100 if (this->has_tex_txf_lz && ir->lod_info.lod->is_zero()) {
4101 opcode = TGSI_OPCODE_TEX_LZ;
4102 } else {
4103 opcode = is_cube_array ? TGSI_OPCODE_TXL2 : TGSI_OPCODE_TXL;
4104 ir->lod_info.lod->accept(this);
4105 lod_info = this->result;
4106 }
4107 if (ir->offset) {
4108 ir->offset->accept(this);
4109 offset[0] = this->result;
4110 }
4111 break;
4112 case ir_txd:
4113 opcode = TGSI_OPCODE_TXD;
4114 ir->lod_info.grad.dPdx->accept(this);
4115 dx = this->result;
4116 ir->lod_info.grad.dPdy->accept(this);
4117 dy = this->result;
4118 if (ir->offset) {
4119 ir->offset->accept(this);
4120 offset[0] = this->result;
4121 }
4122 break;
4123 case ir_txs:
4124 opcode = TGSI_OPCODE_TXQ;
4125 ir->lod_info.lod->accept(this);
4126 lod_info = this->result;
4127 break;
4128 case ir_query_levels:
4129 opcode = TGSI_OPCODE_TXQ;
4130 lod_info = undef_src;
4131 levels_src = get_temp(ir->type);
4132 break;
4133 case ir_txf:
4134 if (this->has_tex_txf_lz && ir->lod_info.lod->is_zero()) {
4135 opcode = TGSI_OPCODE_TXF_LZ;
4136 } else {
4137 opcode = TGSI_OPCODE_TXF;
4138 ir->lod_info.lod->accept(this);
4139 lod_info = this->result;
4140 }
4141 if (ir->offset) {
4142 ir->offset->accept(this);
4143 offset[0] = this->result;
4144 }
4145 break;
4146 case ir_txf_ms:
4147 opcode = TGSI_OPCODE_TXF;
4148 ir->lod_info.sample_index->accept(this);
4149 sample_index = this->result;
4150 break;
4151 case ir_tg4:
4152 opcode = TGSI_OPCODE_TG4;
4153 ir->lod_info.component->accept(this);
4154 component = this->result;
4155 if (ir->offset) {
4156 ir->offset->accept(this);
4157 if (ir->offset->type->is_array()) {
4158 const glsl_type *elt_type = ir->offset->type->fields.array;
4159 for (i = 0; i < ir->offset->type->length; i++) {
4160 offset[i] = this->result;
4161 offset[i].index += i * type_size(elt_type);
4162 offset[i].type = elt_type->base_type;
4163 offset[i].swizzle = swizzle_for_size(elt_type->vector_elements);
4164 offset[i] = canonicalize_gather_offset(offset[i]);
4165 }
4166 } else {
4167 offset[0] = canonicalize_gather_offset(this->result);
4168 }
4169 }
4170 break;
4171 case ir_lod:
4172 opcode = TGSI_OPCODE_LODQ;
4173 break;
4174 case ir_texture_samples:
4175 opcode = TGSI_OPCODE_TXQS;
4176 break;
4177 case ir_samples_identical:
4178 unreachable("Unexpected ir_samples_identical opcode");
4179 }
4180
4181 if (ir->projector) {
4182 if (opcode == TGSI_OPCODE_TEX) {
4183 /* Slot the projector in as the last component of the coord. */
4184 coord_dst.writemask = WRITEMASK_W;
4185 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, projector);
4186 coord_dst.writemask = WRITEMASK_XYZW;
4187 opcode = TGSI_OPCODE_TXP;
4188 } else {
4189 st_src_reg coord_w = coord;
4190 coord_w.swizzle = SWIZZLE_WWWW;
4191
4192 /* For the other TEX opcodes there's no projective version
4193 * since the last slot is taken up by LOD info. Do the
4194 * projective divide now.
4195 */
4196 coord_dst.writemask = WRITEMASK_W;
4197 emit_asm(ir, TGSI_OPCODE_RCP, coord_dst, projector);
4198
4199 /* In the case where we have to project the coordinates "by hand,"
4200 * the shadow comparator value must also be projected.
4201 */
4202 st_src_reg tmp_src = coord;
4203 if (ir->shadow_comparator) {
4204 /* Slot the shadow value in as the second to last component of the
4205 * coord.
4206 */
4207 ir->shadow_comparator->accept(this);
4208
4209 tmp_src = get_temp(glsl_type::vec4_type);
4210 st_dst_reg tmp_dst = st_dst_reg(tmp_src);
4211
4212 /* Projective division not allowed for array samplers. */
4213 assert(!sampler_type->sampler_array);
4214
4215 tmp_dst.writemask = WRITEMASK_Z;
4216 emit_asm(ir, TGSI_OPCODE_MOV, tmp_dst, this->result);
4217
4218 tmp_dst.writemask = WRITEMASK_XY;
4219 emit_asm(ir, TGSI_OPCODE_MOV, tmp_dst, coord);
4220 }
4221
4222 coord_dst.writemask = WRITEMASK_XYZ;
4223 emit_asm(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w);
4224
4225 coord_dst.writemask = WRITEMASK_XYZW;
4226 coord.swizzle = SWIZZLE_XYZW;
4227 }
4228 }
4229
4230 /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow
4231 * comparator was put in the correct place (and projected) by the code,
4232 * above, that handles by-hand projection.
4233 */
4234 if (ir->shadow_comparator && (!ir->projector || opcode == TGSI_OPCODE_TXP)) {
4235 /* Slot the shadow value in as the second to last component of the
4236 * coord.
4237 */
4238 ir->shadow_comparator->accept(this);
4239
4240 if (is_cube_array) {
4241 cube_sc = get_temp(glsl_type::float_type);
4242 cube_sc_dst = st_dst_reg(cube_sc);
4243 cube_sc_dst.writemask = WRITEMASK_X;
4244 emit_asm(ir, TGSI_OPCODE_MOV, cube_sc_dst, this->result);
4245 cube_sc_dst.writemask = WRITEMASK_X;
4246 }
4247 else {
4248 if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D &&
4249 sampler_type->sampler_array) ||
4250 sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) {
4251 coord_dst.writemask = WRITEMASK_W;
4252 } else {
4253 coord_dst.writemask = WRITEMASK_Z;
4254 }
4255 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
4256 coord_dst.writemask = WRITEMASK_XYZW;
4257 }
4258 }
4259
4260 if (ir->op == ir_txf_ms) {
4261 coord_dst.writemask = WRITEMASK_W;
4262 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, sample_index);
4263 coord_dst.writemask = WRITEMASK_XYZW;
4264 } else if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB ||
4265 opcode == TGSI_OPCODE_TXF) {
4266 /* TGSI stores LOD or LOD bias in the last channel of the coords. */
4267 coord_dst.writemask = WRITEMASK_W;
4268 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, lod_info);
4269 coord_dst.writemask = WRITEMASK_XYZW;
4270 }
4271
4272 st_src_reg sampler(PROGRAM_SAMPLER, 0, GLSL_TYPE_UINT);
4273
4274 uint16_t index = 0;
4275 get_deref_offsets(ir->sampler, &sampler_array_size, &sampler_base,
4276 &index, &reladdr, !var->contains_bindless());
4277
4278 sampler.index = index;
4279 if (reladdr.file != PROGRAM_UNDEFINED) {
4280 sampler.reladdr = ralloc(mem_ctx, st_src_reg);
4281 *sampler.reladdr = reladdr;
4282 emit_arl(ir, sampler_reladdr, reladdr);
4283 }
4284
4285 st_src_reg bindless;
4286 if (var->contains_bindless()) {
4287 ir->sampler->accept(this);
4288 bindless = this->result;
4289 }
4290
4291 if (opcode == TGSI_OPCODE_TXD)
4292 inst = emit_asm(ir, opcode, result_dst, coord, dx, dy);
4293 else if (opcode == TGSI_OPCODE_TXQ) {
4294 if (ir->op == ir_query_levels) {
4295 /* the level is stored in W */
4296 inst = emit_asm(ir, opcode, st_dst_reg(levels_src), lod_info);
4297 result_dst.writemask = WRITEMASK_X;
4298 levels_src.swizzle = SWIZZLE_WWWW;
4299 emit_asm(ir, TGSI_OPCODE_MOV, result_dst, levels_src);
4300 } else
4301 inst = emit_asm(ir, opcode, result_dst, lod_info);
4302 } else if (opcode == TGSI_OPCODE_TXQS) {
4303 inst = emit_asm(ir, opcode, result_dst);
4304 } else if (opcode == TGSI_OPCODE_TXL2 || opcode == TGSI_OPCODE_TXB2) {
4305 inst = emit_asm(ir, opcode, result_dst, coord, lod_info);
4306 } else if (opcode == TGSI_OPCODE_TEX2) {
4307 inst = emit_asm(ir, opcode, result_dst, coord, cube_sc);
4308 } else if (opcode == TGSI_OPCODE_TG4) {
4309 if (is_cube_array && ir->shadow_comparator) {
4310 inst = emit_asm(ir, opcode, result_dst, coord, cube_sc);
4311 } else {
4312 inst = emit_asm(ir, opcode, result_dst, coord, component);
4313 }
4314 } else
4315 inst = emit_asm(ir, opcode, result_dst, coord);
4316
4317 if (ir->shadow_comparator)
4318 inst->tex_shadow = GL_TRUE;
4319
4320 if (var->contains_bindless()) {
4321 inst->resource = bindless;
4322 inst->resource.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
4323 SWIZZLE_X, SWIZZLE_Y);
4324 } else {
4325 inst->resource = sampler;
4326 inst->sampler_array_size = sampler_array_size;
4327 inst->sampler_base = sampler_base;
4328 }
4329
4330 if (ir->offset) {
4331 if (!inst->tex_offsets)
4332 inst->tex_offsets = rzalloc_array(inst, st_src_reg, MAX_GLSL_TEXTURE_OFFSET);
4333
4334 for (i = 0; i < MAX_GLSL_TEXTURE_OFFSET && offset[i].file != PROGRAM_UNDEFINED; i++)
4335 inst->tex_offsets[i] = offset[i];
4336 inst->tex_offset_num_offset = i;
4337 }
4338
4339 inst->tex_target = sampler_type->sampler_index();
4340 inst->tex_type = ir->type->base_type;
4341
4342 this->result = result_src;
4343 }
4344
4345 void
4346 glsl_to_tgsi_visitor::visit(ir_return *ir)
4347 {
4348 assert(!ir->get_value());
4349
4350 emit_asm(ir, TGSI_OPCODE_RET);
4351 }
4352
4353 void
4354 glsl_to_tgsi_visitor::visit(ir_discard *ir)
4355 {
4356 if (ir->condition) {
4357 ir->condition->accept(this);
4358 st_src_reg condition = this->result;
4359
4360 /* Convert the bool condition to a float so we can negate. */
4361 if (native_integers) {
4362 st_src_reg temp = get_temp(ir->condition->type);
4363 emit_asm(ir, TGSI_OPCODE_AND, st_dst_reg(temp),
4364 condition, st_src_reg_for_float(1.0));
4365 condition = temp;
4366 }
4367
4368 condition.negate = ~condition.negate;
4369 emit_asm(ir, TGSI_OPCODE_KILL_IF, undef_dst, condition);
4370 } else {
4371 /* unconditional kil */
4372 emit_asm(ir, TGSI_OPCODE_KILL);
4373 }
4374 }
4375
4376 void
4377 glsl_to_tgsi_visitor::visit(ir_if *ir)
4378 {
4379 unsigned if_opcode;
4380 glsl_to_tgsi_instruction *if_inst;
4381
4382 ir->condition->accept(this);
4383 assert(this->result.file != PROGRAM_UNDEFINED);
4384
4385 if_opcode = native_integers ? TGSI_OPCODE_UIF : TGSI_OPCODE_IF;
4386
4387 if_inst = emit_asm(ir->condition, if_opcode, undef_dst, this->result);
4388
4389 this->instructions.push_tail(if_inst);
4390
4391 visit_exec_list(&ir->then_instructions, this);
4392
4393 if (!ir->else_instructions.is_empty()) {
4394 emit_asm(ir->condition, TGSI_OPCODE_ELSE);
4395 visit_exec_list(&ir->else_instructions, this);
4396 }
4397
4398 if_inst = emit_asm(ir->condition, TGSI_OPCODE_ENDIF);
4399 }
4400
4401
4402 void
4403 glsl_to_tgsi_visitor::visit(ir_emit_vertex *ir)
4404 {
4405 assert(this->prog->Target == GL_GEOMETRY_PROGRAM_NV);
4406
4407 ir->stream->accept(this);
4408 emit_asm(ir, TGSI_OPCODE_EMIT, undef_dst, this->result);
4409 }
4410
4411 void
4412 glsl_to_tgsi_visitor::visit(ir_end_primitive *ir)
4413 {
4414 assert(this->prog->Target == GL_GEOMETRY_PROGRAM_NV);
4415
4416 ir->stream->accept(this);
4417 emit_asm(ir, TGSI_OPCODE_ENDPRIM, undef_dst, this->result);
4418 }
4419
4420 void
4421 glsl_to_tgsi_visitor::visit(ir_barrier *ir)
4422 {
4423 assert(this->prog->Target == GL_TESS_CONTROL_PROGRAM_NV ||
4424 this->prog->Target == GL_COMPUTE_PROGRAM_NV);
4425
4426 emit_asm(ir, TGSI_OPCODE_BARRIER);
4427 }
4428
4429 glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
4430 {
4431 STATIC_ASSERT(sizeof(samplers_used) * 8 >= PIPE_MAX_SAMPLERS);
4432
4433 result.file = PROGRAM_UNDEFINED;
4434 next_temp = 1;
4435 array_sizes = NULL;
4436 max_num_arrays = 0;
4437 next_array = 0;
4438 num_inputs = 0;
4439 num_outputs = 0;
4440 num_input_arrays = 0;
4441 num_output_arrays = 0;
4442 num_atomics = 0;
4443 num_atomic_arrays = 0;
4444 num_immediates = 0;
4445 num_address_regs = 0;
4446 samplers_used = 0;
4447 images_used = 0;
4448 indirect_addr_consts = false;
4449 wpos_transform_const = -1;
4450 native_integers = false;
4451 mem_ctx = ralloc_context(NULL);
4452 ctx = NULL;
4453 prog = NULL;
4454 precise = 0;
4455 shader_program = NULL;
4456 shader = NULL;
4457 options = NULL;
4458 have_sqrt = false;
4459 have_fma = false;
4460 use_shared_memory = false;
4461 has_tex_txf_lz = false;
4462 variables = NULL;
4463 }
4464
4465 static void var_destroy(struct hash_entry *entry)
4466 {
4467 variable_storage *storage = (variable_storage *)entry->data;
4468
4469 delete storage;
4470 }
4471
4472 glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor()
4473 {
4474 _mesa_hash_table_destroy(variables, var_destroy);
4475 free(array_sizes);
4476 ralloc_free(mem_ctx);
4477 }
4478
4479 extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v)
4480 {
4481 delete v;
4482 }
4483
4484
4485 /**
4486 * Count resources used by the given gpu program (number of texture
4487 * samplers, etc).
4488 */
4489 static void
4490 count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
4491 {
4492 v->samplers_used = 0;
4493 v->images_used = 0;
4494 prog->info.textures_used_by_txf = 0;
4495
4496 foreach_in_list(glsl_to_tgsi_instruction, inst, &v->instructions) {
4497 if (inst->info->is_tex) {
4498 for (int i = 0; i < inst->sampler_array_size; i++) {
4499 unsigned idx = inst->sampler_base + i;
4500 v->samplers_used |= 1u << idx;
4501
4502 debug_assert(idx < (int)ARRAY_SIZE(v->sampler_types));
4503 v->sampler_types[idx] = inst->tex_type;
4504 v->sampler_targets[idx] =
4505 st_translate_texture_target(inst->tex_target, inst->tex_shadow);
4506
4507 if (inst->op == TGSI_OPCODE_TXF || inst->op == TGSI_OPCODE_TXF_LZ) {
4508 prog->info.textures_used_by_txf |= 1u << idx;
4509 }
4510 }
4511 }
4512
4513 if (inst->tex_target == TEXTURE_EXTERNAL_INDEX)
4514 prog->ExternalSamplersUsed |= 1 << inst->resource.index;
4515
4516 if (inst->resource.file != PROGRAM_UNDEFINED && (
4517 is_resource_instruction(inst->op) ||
4518 inst->op == TGSI_OPCODE_STORE)) {
4519 if (inst->resource.file == PROGRAM_MEMORY) {
4520 v->use_shared_memory = true;
4521 } else if (inst->resource.file == PROGRAM_IMAGE) {
4522 for (int i = 0; i < inst->sampler_array_size; i++) {
4523 unsigned idx = inst->sampler_base + i;
4524 v->images_used |= 1 << idx;
4525 v->image_targets[idx] =
4526 st_translate_texture_target(inst->tex_target, false);
4527 v->image_formats[idx] = inst->image_format;
4528 }
4529 }
4530 }
4531 }
4532 prog->SamplersUsed = v->samplers_used;
4533
4534 if (v->shader_program != NULL)
4535 _mesa_update_shader_textures_used(v->shader_program, prog);
4536 }
4537
4538 /**
4539 * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
4540 * are read from the given src in this instruction
4541 */
4542 static int
4543 get_src_arg_mask(st_dst_reg dst, st_src_reg src)
4544 {
4545 int read_mask = 0, comp;
4546
4547 /* Now, given the src swizzle and the written channels, find which
4548 * components are actually read
4549 */
4550 for (comp = 0; comp < 4; ++comp) {
4551 const unsigned coord = GET_SWZ(src.swizzle, comp);
4552 assert(coord < 4);
4553 if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W)
4554 read_mask |= 1 << coord;
4555 }
4556
4557 return read_mask;
4558 }
4559
4560 /**
4561 * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP
4562 * instruction is the first instruction to write to register T0. There are
4563 * several lowering passes done in GLSL IR (e.g. branches and
4564 * relative addressing) that create a large number of conditional assignments
4565 * that ir_to_mesa converts to CMP instructions like the one mentioned above.
4566 *
4567 * Here is why this conversion is safe:
4568 * CMP T0, T1 T2 T0 can be expanded to:
4569 * if (T1 < 0.0)
4570 * MOV T0, T2;
4571 * else
4572 * MOV T0, T0;
4573 *
4574 * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same
4575 * as the original program. If (T1 < 0.0) evaluates to false, executing
4576 * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized.
4577 * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2
4578 * because any instruction that was going to read from T0 after this was going
4579 * to read a garbage value anyway.
4580 */
4581 void
4582 glsl_to_tgsi_visitor::simplify_cmp(void)
4583 {
4584 int tempWritesSize = 0;
4585 unsigned *tempWrites = NULL;
4586 unsigned outputWrites[VARYING_SLOT_TESS_MAX];
4587
4588 memset(outputWrites, 0, sizeof(outputWrites));
4589
4590 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
4591 unsigned prevWriteMask = 0;
4592
4593 /* Give up if we encounter relative addressing or flow control. */
4594 if (inst->dst[0].reladdr || inst->dst[0].reladdr2 ||
4595 inst->dst[1].reladdr || inst->dst[1].reladdr2 ||
4596 inst->info->is_branch ||
4597 inst->op == TGSI_OPCODE_CONT ||
4598 inst->op == TGSI_OPCODE_END ||
4599 inst->op == TGSI_OPCODE_RET) {
4600 break;
4601 }
4602
4603 if (inst->dst[0].file == PROGRAM_OUTPUT) {
4604 assert(inst->dst[0].index < (signed)ARRAY_SIZE(outputWrites));
4605 prevWriteMask = outputWrites[inst->dst[0].index];
4606 outputWrites[inst->dst[0].index] |= inst->dst[0].writemask;
4607 } else if (inst->dst[0].file == PROGRAM_TEMPORARY) {
4608 if (inst->dst[0].index >= tempWritesSize) {
4609 const int inc = 4096;
4610
4611 tempWrites = (unsigned*)
4612 realloc(tempWrites,
4613 (tempWritesSize + inc) * sizeof(unsigned));
4614 if (!tempWrites)
4615 return;
4616
4617 memset(tempWrites + tempWritesSize, 0, inc * sizeof(unsigned));
4618 tempWritesSize += inc;
4619 }
4620
4621 prevWriteMask = tempWrites[inst->dst[0].index];
4622 tempWrites[inst->dst[0].index] |= inst->dst[0].writemask;
4623 } else
4624 continue;
4625
4626 /* For a CMP to be considered a conditional write, the destination
4627 * register and source register two must be the same. */
4628 if (inst->op == TGSI_OPCODE_CMP
4629 && !(inst->dst[0].writemask & prevWriteMask)
4630 && inst->src[2].file == inst->dst[0].file
4631 && inst->src[2].index == inst->dst[0].index
4632 && inst->dst[0].writemask == get_src_arg_mask(inst->dst[0], inst->src[2])) {
4633
4634 inst->op = TGSI_OPCODE_MOV;
4635 inst->info = tgsi_get_opcode_info(inst->op);
4636 inst->src[0] = inst->src[1];
4637 }
4638 }
4639
4640 free(tempWrites);
4641 }
4642
4643 static void
4644 rename_temp_handle_src(struct rename_reg_pair *renames, st_src_reg *src)
4645 {
4646 if (src && src->file == PROGRAM_TEMPORARY) {
4647 int old_idx = src->index;
4648 if (renames[old_idx].valid)
4649 src->index = renames[old_idx].new_reg;
4650 }
4651 }
4652
4653 /* Replaces all references to a temporary register index with another index. */
4654 void
4655 glsl_to_tgsi_visitor::rename_temp_registers(struct rename_reg_pair *renames)
4656 {
4657 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
4658 unsigned j;
4659 for (j = 0; j < num_inst_src_regs(inst); j++) {
4660 rename_temp_handle_src(renames, &inst->src[j]);
4661 rename_temp_handle_src(renames, inst->src[j].reladdr);
4662 rename_temp_handle_src(renames, inst->src[j].reladdr2);
4663 }
4664
4665 for (j = 0; j < inst->tex_offset_num_offset; j++) {
4666 rename_temp_handle_src(renames, &inst->tex_offsets[j]);
4667 rename_temp_handle_src(renames, inst->tex_offsets[j].reladdr);
4668 rename_temp_handle_src(renames, inst->tex_offsets[j].reladdr2);
4669 }
4670
4671 rename_temp_handle_src(renames, &inst->resource);
4672 rename_temp_handle_src(renames, inst->resource.reladdr);
4673 rename_temp_handle_src(renames, inst->resource.reladdr2);
4674
4675 for (j = 0; j < num_inst_dst_regs(inst); j++) {
4676 if (inst->dst[j].file == PROGRAM_TEMPORARY) {
4677 int old_idx = inst->dst[j].index;
4678 if (renames[old_idx].valid)
4679 inst->dst[j].index = renames[old_idx].new_reg;
4680 }
4681 rename_temp_handle_src(renames, inst->dst[j].reladdr);
4682 rename_temp_handle_src(renames, inst->dst[j].reladdr2);
4683 }
4684 }
4685 }
4686
4687 void
4688 glsl_to_tgsi_visitor::get_first_temp_write(int *first_writes)
4689 {
4690 int depth = 0; /* loop depth */
4691 int loop_start = -1; /* index of the first active BGNLOOP (if any) */
4692 unsigned i = 0, j;
4693
4694 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
4695 for (j = 0; j < num_inst_dst_regs(inst); j++) {
4696 if (inst->dst[j].file == PROGRAM_TEMPORARY) {
4697 if (first_writes[inst->dst[j].index] == -1)
4698 first_writes[inst->dst[j].index] = (depth == 0) ? i : loop_start;
4699 }
4700 }
4701
4702 if (inst->op == TGSI_OPCODE_BGNLOOP) {
4703 if(depth++ == 0)
4704 loop_start = i;
4705 } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
4706 if (--depth == 0)
4707 loop_start = -1;
4708 }
4709 assert(depth >= 0);
4710 i++;
4711 }
4712 }
4713
4714 void
4715 glsl_to_tgsi_visitor::get_first_temp_read(int *first_reads)
4716 {
4717 int depth = 0; /* loop depth */
4718 int loop_start = -1; /* index of the first active BGNLOOP (if any) */
4719 unsigned i = 0, j;
4720
4721 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
4722 for (j = 0; j < num_inst_src_regs(inst); j++) {
4723 if (inst->src[j].file == PROGRAM_TEMPORARY) {
4724 if (first_reads[inst->src[j].index] == -1)
4725 first_reads[inst->src[j].index] = (depth == 0) ? i : loop_start;
4726 }
4727 }
4728 for (j = 0; j < inst->tex_offset_num_offset; j++) {
4729 if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) {
4730 if (first_reads[inst->tex_offsets[j].index] == -1)
4731 first_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : loop_start;
4732 }
4733 }
4734 if (inst->op == TGSI_OPCODE_BGNLOOP) {
4735 if(depth++ == 0)
4736 loop_start = i;
4737 } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
4738 if (--depth == 0)
4739 loop_start = -1;
4740 }
4741 assert(depth >= 0);
4742 i++;
4743 }
4744 }
4745
4746 void
4747 glsl_to_tgsi_visitor::get_last_temp_read_first_temp_write(int *last_reads, int *first_writes)
4748 {
4749 int depth = 0; /* loop depth */
4750 int loop_start = -1; /* index of the first active BGNLOOP (if any) */
4751 unsigned i = 0, j;
4752 int k;
4753 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
4754 for (j = 0; j < num_inst_src_regs(inst); j++) {
4755 if (inst->src[j].file == PROGRAM_TEMPORARY)
4756 last_reads[inst->src[j].index] = (depth == 0) ? i : -2;
4757 }
4758 for (j = 0; j < num_inst_dst_regs(inst); j++) {
4759 if (inst->dst[j].file == PROGRAM_TEMPORARY) {
4760 if (first_writes[inst->dst[j].index] == -1)
4761 first_writes[inst->dst[j].index] = (depth == 0) ? i : loop_start;
4762 last_reads[inst->dst[j].index] = (depth == 0) ? i : -2;
4763 }
4764 }
4765 for (j = 0; j < inst->tex_offset_num_offset; j++) {
4766 if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY)
4767 last_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : -2;
4768 }
4769 if (inst->op == TGSI_OPCODE_BGNLOOP) {
4770 if(depth++ == 0)
4771 loop_start = i;
4772 } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
4773 if (--depth == 0) {
4774 loop_start = -1;
4775 for (k = 0; k < this->next_temp; k++) {
4776 if (last_reads[k] == -2) {
4777 last_reads[k] = i;
4778 }
4779 }
4780 }
4781 }
4782 assert(depth >= 0);
4783 i++;
4784 }
4785 }
4786
4787 void
4788 glsl_to_tgsi_visitor::get_last_temp_write(int *last_writes)
4789 {
4790 int depth = 0; /* loop depth */
4791 int i = 0, k;
4792 unsigned j;
4793
4794 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
4795 for (j = 0; j < num_inst_dst_regs(inst); j++) {
4796 if (inst->dst[j].file == PROGRAM_TEMPORARY)
4797 last_writes[inst->dst[j].index] = (depth == 0) ? i : -2;
4798 }
4799
4800 if (inst->op == TGSI_OPCODE_BGNLOOP)
4801 depth++;
4802 else if (inst->op == TGSI_OPCODE_ENDLOOP)
4803 if (--depth == 0) {
4804 for (k = 0; k < this->next_temp; k++) {
4805 if (last_writes[k] == -2) {
4806 last_writes[k] = i;
4807 }
4808 }
4809 }
4810 assert(depth >= 0);
4811 i++;
4812 }
4813 }
4814
4815 /*
4816 * On a basic block basis, tracks available PROGRAM_TEMPORARY register
4817 * channels for copy propagation and updates following instructions to
4818 * use the original versions.
4819 *
4820 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
4821 * will occur. As an example, a TXP production before this pass:
4822 *
4823 * 0: MOV TEMP[1], INPUT[4].xyyy;
4824 * 1: MOV TEMP[1].w, INPUT[4].wwww;
4825 * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
4826 *
4827 * and after:
4828 *
4829 * 0: MOV TEMP[1], INPUT[4].xyyy;
4830 * 1: MOV TEMP[1].w, INPUT[4].wwww;
4831 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
4832 *
4833 * which allows for dead code elimination on TEMP[1]'s writes.
4834 */
4835 void
4836 glsl_to_tgsi_visitor::copy_propagate(void)
4837 {
4838 glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx,
4839 glsl_to_tgsi_instruction *,
4840 this->next_temp * 4);
4841 int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
4842 int level = 0;
4843
4844 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
4845 assert(inst->dst[0].file != PROGRAM_TEMPORARY
4846 || inst->dst[0].index < this->next_temp);
4847
4848 /* First, do any copy propagation possible into the src regs. */
4849 for (int r = 0; r < 3; r++) {
4850 glsl_to_tgsi_instruction *first = NULL;
4851 bool good = true;
4852 int acp_base = inst->src[r].index * 4;
4853
4854 if (inst->src[r].file != PROGRAM_TEMPORARY ||
4855 inst->src[r].reladdr ||
4856 inst->src[r].reladdr2)
4857 continue;
4858
4859 /* See if we can find entries in the ACP consisting of MOVs
4860 * from the same src register for all the swizzled channels
4861 * of this src register reference.
4862 */
4863 for (int i = 0; i < 4; i++) {
4864 int src_chan = GET_SWZ(inst->src[r].swizzle, i);
4865 glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan];
4866
4867 if (!copy_chan) {
4868 good = false;
4869 break;
4870 }
4871
4872 assert(acp_level[acp_base + src_chan] <= level);
4873
4874 if (!first) {
4875 first = copy_chan;
4876 } else {
4877 if (first->src[0].file != copy_chan->src[0].file ||
4878 first->src[0].index != copy_chan->src[0].index ||
4879 first->src[0].double_reg2 != copy_chan->src[0].double_reg2 ||
4880 first->src[0].index2D != copy_chan->src[0].index2D) {
4881 good = false;
4882 break;
4883 }
4884 }
4885 }
4886
4887 if (good) {
4888 /* We've now validated that we can copy-propagate to
4889 * replace this src register reference. Do it.
4890 */
4891 inst->src[r].file = first->src[0].file;
4892 inst->src[r].index = first->src[0].index;
4893 inst->src[r].index2D = first->src[0].index2D;
4894 inst->src[r].has_index2 = first->src[0].has_index2;
4895 inst->src[r].double_reg2 = first->src[0].double_reg2;
4896 inst->src[r].array_id = first->src[0].array_id;
4897
4898 int swizzle = 0;
4899 for (int i = 0; i < 4; i++) {
4900 int src_chan = GET_SWZ(inst->src[r].swizzle, i);
4901 glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan];
4902 swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) << (3 * i));
4903 }
4904 inst->src[r].swizzle = swizzle;
4905 }
4906 }
4907
4908 switch (inst->op) {
4909 case TGSI_OPCODE_BGNLOOP:
4910 case TGSI_OPCODE_ENDLOOP:
4911 /* End of a basic block, clear the ACP entirely. */
4912 memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
4913 break;
4914
4915 case TGSI_OPCODE_IF:
4916 case TGSI_OPCODE_UIF:
4917 ++level;
4918 break;
4919
4920 case TGSI_OPCODE_ENDIF:
4921 case TGSI_OPCODE_ELSE:
4922 /* Clear all channels written inside the block from the ACP, but
4923 * leaving those that were not touched.
4924 */
4925 for (int r = 0; r < this->next_temp; r++) {
4926 for (int c = 0; c < 4; c++) {
4927 if (!acp[4 * r + c])
4928 continue;
4929
4930 if (acp_level[4 * r + c] >= level)
4931 acp[4 * r + c] = NULL;
4932 }
4933 }
4934 if (inst->op == TGSI_OPCODE_ENDIF)
4935 --level;
4936 break;
4937
4938 default:
4939 /* Continuing the block, clear any written channels from
4940 * the ACP.
4941 */
4942 for (int d = 0; d < 2; d++) {
4943 if (inst->dst[d].file == PROGRAM_TEMPORARY && inst->dst[d].reladdr) {
4944 /* Any temporary might be written, so no copy propagation
4945 * across this instruction.
4946 */
4947 memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
4948 } else if (inst->dst[d].file == PROGRAM_OUTPUT &&
4949 inst->dst[d].reladdr) {
4950 /* Any output might be written, so no copy propagation
4951 * from outputs across this instruction.
4952 */
4953 for (int r = 0; r < this->next_temp; r++) {
4954 for (int c = 0; c < 4; c++) {
4955 if (!acp[4 * r + c])
4956 continue;
4957
4958 if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT)
4959 acp[4 * r + c] = NULL;
4960 }
4961 }
4962 } else if (inst->dst[d].file == PROGRAM_TEMPORARY ||
4963 inst->dst[d].file == PROGRAM_OUTPUT) {
4964 /* Clear where it's used as dst. */
4965 if (inst->dst[d].file == PROGRAM_TEMPORARY) {
4966 for (int c = 0; c < 4; c++) {
4967 if (inst->dst[d].writemask & (1 << c))
4968 acp[4 * inst->dst[d].index + c] = NULL;
4969 }
4970 }
4971
4972 /* Clear where it's used as src. */
4973 for (int r = 0; r < this->next_temp; r++) {
4974 for (int c = 0; c < 4; c++) {
4975 if (!acp[4 * r + c])
4976 continue;
4977
4978 int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c);
4979
4980 if (acp[4 * r + c]->src[0].file == inst->dst[d].file &&
4981 acp[4 * r + c]->src[0].index == inst->dst[d].index &&
4982 inst->dst[d].writemask & (1 << src_chan)) {
4983 acp[4 * r + c] = NULL;
4984 }
4985 }
4986 }
4987 }
4988 }
4989 break;
4990 }
4991
4992 /* If this is a copy, add it to the ACP. */
4993 if (inst->op == TGSI_OPCODE_MOV &&
4994 inst->dst[0].file == PROGRAM_TEMPORARY &&
4995 !(inst->dst[0].file == inst->src[0].file &&
4996 inst->dst[0].index == inst->src[0].index) &&
4997 !inst->dst[0].reladdr &&
4998 !inst->dst[0].reladdr2 &&
4999 !inst->saturate &&
5000 inst->src[0].file != PROGRAM_ARRAY &&
5001 (inst->src[0].file != PROGRAM_OUTPUT ||
5002 this->shader->Stage != MESA_SHADER_TESS_CTRL) &&
5003 !inst->src[0].reladdr &&
5004 !inst->src[0].reladdr2 &&
5005 !inst->src[0].negate &&
5006 !inst->src[0].abs) {
5007 for (int i = 0; i < 4; i++) {
5008 if (inst->dst[0].writemask & (1 << i)) {
5009 acp[4 * inst->dst[0].index + i] = inst;
5010 acp_level[4 * inst->dst[0].index + i] = level;
5011 }
5012 }
5013 }
5014 }
5015
5016 ralloc_free(acp_level);
5017 ralloc_free(acp);
5018 }
5019
5020 static void
5021 dead_code_handle_reladdr(glsl_to_tgsi_instruction **writes, st_src_reg *reladdr)
5022 {
5023 if (reladdr && reladdr->file == PROGRAM_TEMPORARY) {
5024 /* Clear where it's used as src. */
5025 int swz = GET_SWZ(reladdr->swizzle, 0);
5026 writes[4 * reladdr->index + swz] = NULL;
5027 }
5028 }
5029
5030 /*
5031 * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead
5032 * code elimination.
5033 *
5034 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
5035 * will occur. As an example, a TXP production after copy propagation but
5036 * before this pass:
5037 *
5038 * 0: MOV TEMP[1], INPUT[4].xyyy;
5039 * 1: MOV TEMP[1].w, INPUT[4].wwww;
5040 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
5041 *
5042 * and after this pass:
5043 *
5044 * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
5045 */
5046 int
5047 glsl_to_tgsi_visitor::eliminate_dead_code(void)
5048 {
5049 glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx,
5050 glsl_to_tgsi_instruction *,
5051 this->next_temp * 4);
5052 int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
5053 int level = 0;
5054 int removed = 0;
5055
5056 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
5057 assert(inst->dst[0].file != PROGRAM_TEMPORARY
5058 || inst->dst[0].index < this->next_temp);
5059
5060 switch (inst->op) {
5061 case TGSI_OPCODE_BGNLOOP:
5062 case TGSI_OPCODE_ENDLOOP:
5063 case TGSI_OPCODE_CONT:
5064 case TGSI_OPCODE_BRK:
5065 /* End of a basic block, clear the write array entirely.
5066 *
5067 * This keeps us from killing dead code when the writes are
5068 * on either side of a loop, even when the register isn't touched
5069 * inside the loop. However, glsl_to_tgsi_visitor doesn't seem to emit
5070 * dead code of this type, so it shouldn't make a difference as long as
5071 * the dead code elimination pass in the GLSL compiler does its job.
5072 */
5073 memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
5074 break;
5075
5076 case TGSI_OPCODE_ENDIF:
5077 case TGSI_OPCODE_ELSE:
5078 /* Promote the recorded level of all channels written inside the
5079 * preceding if or else block to the level above the if/else block.
5080 */
5081 for (int r = 0; r < this->next_temp; r++) {
5082 for (int c = 0; c < 4; c++) {
5083 if (!writes[4 * r + c])
5084 continue;
5085
5086 if (write_level[4 * r + c] == level)
5087 write_level[4 * r + c] = level-1;
5088 }
5089 }
5090 if(inst->op == TGSI_OPCODE_ENDIF)
5091 --level;
5092 break;
5093
5094 case TGSI_OPCODE_IF:
5095 case TGSI_OPCODE_UIF:
5096 ++level;
5097 /* fallthrough to default case to mark the condition as read */
5098 default:
5099 /* Continuing the block, clear any channels from the write array that
5100 * are read by this instruction.
5101 */
5102 for (unsigned i = 0; i < ARRAY_SIZE(inst->src); i++) {
5103 if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){
5104 /* Any temporary might be read, so no dead code elimination
5105 * across this instruction.
5106 */
5107 memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
5108 } else if (inst->src[i].file == PROGRAM_TEMPORARY) {
5109 /* Clear where it's used as src. */
5110 int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0);
5111 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1);
5112 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2);
5113 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3);
5114
5115 for (int c = 0; c < 4; c++) {
5116 if (src_chans & (1 << c))
5117 writes[4 * inst->src[i].index + c] = NULL;
5118 }
5119 }
5120 dead_code_handle_reladdr(writes, inst->src[i].reladdr);
5121 dead_code_handle_reladdr(writes, inst->src[i].reladdr2);
5122 }
5123 for (unsigned i = 0; i < inst->tex_offset_num_offset; i++) {
5124 if (inst->tex_offsets[i].file == PROGRAM_TEMPORARY && inst->tex_offsets[i].reladdr){
5125 /* Any temporary might be read, so no dead code elimination
5126 * across this instruction.
5127 */
5128 memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
5129 } else if (inst->tex_offsets[i].file == PROGRAM_TEMPORARY) {
5130 /* Clear where it's used as src. */
5131 int src_chans = 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 0);
5132 src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 1);
5133 src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 2);
5134 src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 3);
5135
5136 for (int c = 0; c < 4; c++) {
5137 if (src_chans & (1 << c))
5138 writes[4 * inst->tex_offsets[i].index + c] = NULL;
5139 }
5140 }
5141 dead_code_handle_reladdr(writes, inst->tex_offsets[i].reladdr);
5142 dead_code_handle_reladdr(writes, inst->tex_offsets[i].reladdr2);
5143 }
5144
5145 if (inst->resource.file == PROGRAM_TEMPORARY) {
5146 int src_chans;
5147
5148 src_chans = 1 << GET_SWZ(inst->resource.swizzle, 0);
5149 src_chans |= 1 << GET_SWZ(inst->resource.swizzle, 1);
5150 src_chans |= 1 << GET_SWZ(inst->resource.swizzle, 2);
5151 src_chans |= 1 << GET_SWZ(inst->resource.swizzle, 3);
5152
5153 for (int c = 0; c < 4; c++) {
5154 if (src_chans & (1 << c))
5155 writes[4 * inst->resource.index + c] = NULL;
5156 }
5157 }
5158 dead_code_handle_reladdr(writes, inst->resource.reladdr);
5159 dead_code_handle_reladdr(writes, inst->resource.reladdr2);
5160
5161 for (unsigned i = 0; i < ARRAY_SIZE(inst->dst); i++) {
5162 dead_code_handle_reladdr(writes, inst->dst[i].reladdr);
5163 dead_code_handle_reladdr(writes, inst->dst[i].reladdr2);
5164 }
5165 break;
5166 }
5167
5168 /* If this instruction writes to a temporary, add it to the write array.
5169 * If there is already an instruction in the write array for one or more
5170 * of the channels, flag that channel write as dead.
5171 */
5172 for (unsigned i = 0; i < ARRAY_SIZE(inst->dst); i++) {
5173 if (inst->dst[i].file == PROGRAM_TEMPORARY &&
5174 !inst->dst[i].reladdr) {
5175 for (int c = 0; c < 4; c++) {
5176 if (inst->dst[i].writemask & (1 << c)) {
5177 if (writes[4 * inst->dst[i].index + c]) {
5178 if (write_level[4 * inst->dst[i].index + c] < level)
5179 continue;
5180 else
5181 writes[4 * inst->dst[i].index + c]->dead_mask |= (1 << c);
5182 }
5183 writes[4 * inst->dst[i].index + c] = inst;
5184 write_level[4 * inst->dst[i].index + c] = level;
5185 }
5186 }
5187 }
5188 }
5189 }
5190
5191 /* Anything still in the write array at this point is dead code. */
5192 for (int r = 0; r < this->next_temp; r++) {
5193 for (int c = 0; c < 4; c++) {
5194 glsl_to_tgsi_instruction *inst = writes[4 * r + c];
5195 if (inst)
5196 inst->dead_mask |= (1 << c);
5197 }
5198 }
5199
5200 /* Now actually remove the instructions that are completely dead and update
5201 * the writemask of other instructions with dead channels.
5202 */
5203 foreach_in_list_safe(glsl_to_tgsi_instruction, inst, &this->instructions) {
5204 if (!inst->dead_mask || !inst->dst[0].writemask)
5205 continue;
5206 /* No amount of dead masks should remove memory stores */
5207 if (inst->info->is_store)
5208 continue;
5209
5210 if ((inst->dst[0].writemask & ~inst->dead_mask) == 0) {
5211 inst->remove();
5212 delete inst;
5213 removed++;
5214 } else {
5215 if (glsl_base_type_is_64bit(inst->dst[0].type)) {
5216 if (inst->dead_mask == WRITEMASK_XY ||
5217 inst->dead_mask == WRITEMASK_ZW)
5218 inst->dst[0].writemask &= ~(inst->dead_mask);
5219 } else
5220 inst->dst[0].writemask &= ~(inst->dead_mask);
5221 }
5222 }
5223
5224 ralloc_free(write_level);
5225 ralloc_free(writes);
5226
5227 return removed;
5228 }
5229
5230 /* merge DFRACEXP instructions into one. */
5231 void
5232 glsl_to_tgsi_visitor::merge_two_dsts(void)
5233 {
5234 /* We never delete inst, but we may delete its successor. */
5235 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
5236 glsl_to_tgsi_instruction *inst2;
5237 unsigned defined;
5238
5239 if (num_inst_dst_regs(inst) != 2)
5240 continue;
5241
5242 if (inst->dst[0].file != PROGRAM_UNDEFINED &&
5243 inst->dst[1].file != PROGRAM_UNDEFINED)
5244 continue;
5245
5246 assert(inst->dst[0].file != PROGRAM_UNDEFINED ||
5247 inst->dst[1].file != PROGRAM_UNDEFINED);
5248
5249 if (inst->dst[0].file == PROGRAM_UNDEFINED)
5250 defined = 1;
5251 else
5252 defined = 0;
5253
5254 inst2 = (glsl_to_tgsi_instruction *) inst->next;
5255 while (!inst2->is_tail_sentinel()) {
5256 if (inst->op == inst2->op &&
5257 inst2->dst[defined].file == PROGRAM_UNDEFINED &&
5258 inst->src[0].file == inst2->src[0].file &&
5259 inst->src[0].index == inst2->src[0].index &&
5260 inst->src[0].type == inst2->src[0].type &&
5261 inst->src[0].swizzle == inst2->src[0].swizzle)
5262 break;
5263 inst2 = (glsl_to_tgsi_instruction *) inst2->next;
5264 }
5265
5266 if (inst2->is_tail_sentinel()) {
5267 /* Undefined destinations are not allowed, substitute with an unused
5268 * temporary register.
5269 */
5270 st_src_reg tmp = get_temp(glsl_type::vec4_type);
5271 inst->dst[defined ^ 1] = st_dst_reg(tmp);
5272 inst->dst[defined ^ 1].writemask = 0;
5273 continue;
5274 }
5275
5276 inst->dst[defined ^ 1] = inst2->dst[defined ^ 1];
5277 inst2->remove();
5278 delete inst2;
5279 }
5280 }
5281
5282 /* Merges temporary registers together where possible to reduce the number of
5283 * registers needed to run a program.
5284 *
5285 * Produces optimal code only after copy propagation and dead code elimination
5286 * have been run. */
5287 void
5288 glsl_to_tgsi_visitor::merge_registers(void)
5289 {
5290 assert(need_uarl);
5291 struct lifetime *lifetimes =
5292 rzalloc_array(mem_ctx, struct lifetime, this->next_temp);
5293
5294 if (get_temp_registers_required_lifetimes(mem_ctx, &this->instructions,
5295 this->next_temp, lifetimes)) {
5296 struct rename_reg_pair *renames =
5297 rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp);
5298 get_temp_registers_remapping(mem_ctx, this->next_temp, lifetimes, renames);
5299 rename_temp_registers(renames);
5300 ralloc_free(renames);
5301 }
5302
5303 ralloc_free(lifetimes);
5304 }
5305
5306 /* Reassign indices to temporary registers by reusing unused indices created
5307 * by optimization passes. */
5308 void
5309 glsl_to_tgsi_visitor::renumber_registers(void)
5310 {
5311 int i = 0;
5312 int new_index = 0;
5313 int *first_writes = ralloc_array(mem_ctx, int, this->next_temp);
5314 struct rename_reg_pair *renames = rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp);
5315
5316 for (i = 0; i < this->next_temp; i++) {
5317 first_writes[i] = -1;
5318 }
5319 get_first_temp_write(first_writes);
5320
5321 for (i = 0; i < this->next_temp; i++) {
5322 if (first_writes[i] < 0) continue;
5323 if (i != new_index) {
5324 renames[i].new_reg = new_index;
5325 renames[i].valid = true;
5326 }
5327 new_index++;
5328 }
5329
5330 rename_temp_registers(renames);
5331 this->next_temp = new_index;
5332 ralloc_free(renames);
5333 ralloc_free(first_writes);
5334 }
5335
5336 /* ------------------------- TGSI conversion stuff -------------------------- */
5337
5338 /**
5339 * Intermediate state used during shader translation.
5340 */
5341 struct st_translate {
5342 struct ureg_program *ureg;
5343
5344 unsigned temps_size;
5345 struct ureg_dst *temps;
5346
5347 struct ureg_dst *arrays;
5348 unsigned num_temp_arrays;
5349 struct ureg_src *constants;
5350 int num_constants;
5351 struct ureg_src *immediates;
5352 int num_immediates;
5353 struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
5354 struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
5355 struct ureg_dst address[3];
5356 struct ureg_src samplers[PIPE_MAX_SAMPLERS];
5357 struct ureg_src buffers[PIPE_MAX_SHADER_BUFFERS];
5358 struct ureg_src images[PIPE_MAX_SHADER_IMAGES];
5359 struct ureg_src systemValues[SYSTEM_VALUE_MAX];
5360 struct ureg_src hw_atomics[PIPE_MAX_HW_ATOMIC_BUFFERS];
5361 struct ureg_src shared_memory;
5362 unsigned *array_sizes;
5363 struct inout_decl *input_decls;
5364 unsigned num_input_decls;
5365 struct inout_decl *output_decls;
5366 unsigned num_output_decls;
5367
5368 const ubyte *inputMapping;
5369 const ubyte *outputMapping;
5370
5371 unsigned procType; /**< PIPE_SHADER_VERTEX/FRAGMENT */
5372 bool need_uarl;
5373 };
5374
5375 /** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */
5376 unsigned
5377 _mesa_sysval_to_semantic(unsigned sysval)
5378 {
5379 switch (sysval) {
5380 /* Vertex shader */
5381 case SYSTEM_VALUE_VERTEX_ID:
5382 return TGSI_SEMANTIC_VERTEXID;
5383 case SYSTEM_VALUE_INSTANCE_ID:
5384 return TGSI_SEMANTIC_INSTANCEID;
5385 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
5386 return TGSI_SEMANTIC_VERTEXID_NOBASE;
5387 case SYSTEM_VALUE_BASE_VERTEX:
5388 return TGSI_SEMANTIC_BASEVERTEX;
5389 case SYSTEM_VALUE_BASE_INSTANCE:
5390 return TGSI_SEMANTIC_BASEINSTANCE;
5391 case SYSTEM_VALUE_DRAW_ID:
5392 return TGSI_SEMANTIC_DRAWID;
5393
5394 /* Geometry shader */
5395 case SYSTEM_VALUE_INVOCATION_ID:
5396 return TGSI_SEMANTIC_INVOCATIONID;
5397
5398 /* Fragment shader */
5399 case SYSTEM_VALUE_FRAG_COORD:
5400 return TGSI_SEMANTIC_POSITION;
5401 case SYSTEM_VALUE_FRONT_FACE:
5402 return TGSI_SEMANTIC_FACE;
5403 case SYSTEM_VALUE_SAMPLE_ID:
5404 return TGSI_SEMANTIC_SAMPLEID;
5405 case SYSTEM_VALUE_SAMPLE_POS:
5406 return TGSI_SEMANTIC_SAMPLEPOS;
5407 case SYSTEM_VALUE_SAMPLE_MASK_IN:
5408 return TGSI_SEMANTIC_SAMPLEMASK;
5409 case SYSTEM_VALUE_HELPER_INVOCATION:
5410 return TGSI_SEMANTIC_HELPER_INVOCATION;
5411
5412 /* Tessellation shader */
5413 case SYSTEM_VALUE_TESS_COORD:
5414 return TGSI_SEMANTIC_TESSCOORD;
5415 case SYSTEM_VALUE_VERTICES_IN:
5416 return TGSI_SEMANTIC_VERTICESIN;
5417 case SYSTEM_VALUE_PRIMITIVE_ID:
5418 return TGSI_SEMANTIC_PRIMID;
5419 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
5420 return TGSI_SEMANTIC_TESSOUTER;
5421 case SYSTEM_VALUE_TESS_LEVEL_INNER:
5422 return TGSI_SEMANTIC_TESSINNER;
5423
5424 /* Compute shader */
5425 case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
5426 return TGSI_SEMANTIC_THREAD_ID;
5427 case SYSTEM_VALUE_WORK_GROUP_ID:
5428 return TGSI_SEMANTIC_BLOCK_ID;
5429 case SYSTEM_VALUE_NUM_WORK_GROUPS:
5430 return TGSI_SEMANTIC_GRID_SIZE;
5431 case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
5432 return TGSI_SEMANTIC_BLOCK_SIZE;
5433
5434 /* ARB_shader_ballot */
5435 case SYSTEM_VALUE_SUBGROUP_SIZE:
5436 return TGSI_SEMANTIC_SUBGROUP_SIZE;
5437 case SYSTEM_VALUE_SUBGROUP_INVOCATION:
5438 return TGSI_SEMANTIC_SUBGROUP_INVOCATION;
5439 case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
5440 return TGSI_SEMANTIC_SUBGROUP_EQ_MASK;
5441 case SYSTEM_VALUE_SUBGROUP_GE_MASK:
5442 return TGSI_SEMANTIC_SUBGROUP_GE_MASK;
5443 case SYSTEM_VALUE_SUBGROUP_GT_MASK:
5444 return TGSI_SEMANTIC_SUBGROUP_GT_MASK;
5445 case SYSTEM_VALUE_SUBGROUP_LE_MASK:
5446 return TGSI_SEMANTIC_SUBGROUP_LE_MASK;
5447 case SYSTEM_VALUE_SUBGROUP_LT_MASK:
5448 return TGSI_SEMANTIC_SUBGROUP_LT_MASK;
5449
5450 /* Unhandled */
5451 case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX:
5452 case SYSTEM_VALUE_GLOBAL_INVOCATION_ID:
5453 case SYSTEM_VALUE_VERTEX_CNT:
5454 default:
5455 assert(!"Unexpected SYSTEM_VALUE_ enum");
5456 return TGSI_SEMANTIC_COUNT;
5457 }
5458 }
5459
5460 /**
5461 * Map a glsl_to_tgsi constant/immediate to a TGSI immediate.
5462 */
5463 static struct ureg_src
5464 emit_immediate(struct st_translate *t,
5465 gl_constant_value values[4],
5466 int type, int size)
5467 {
5468 struct ureg_program *ureg = t->ureg;
5469
5470 switch(type)
5471 {
5472 case GL_FLOAT:
5473 return ureg_DECL_immediate(ureg, &values[0].f, size);
5474 case GL_DOUBLE:
5475 return ureg_DECL_immediate_f64(ureg, (double *)&values[0].f, size);
5476 case GL_INT64_ARB:
5477 return ureg_DECL_immediate_int64(ureg, (int64_t *)&values[0].f, size);
5478 case GL_UNSIGNED_INT64_ARB:
5479 return ureg_DECL_immediate_uint64(ureg, (uint64_t *)&values[0].f, size);
5480 case GL_INT:
5481 return ureg_DECL_immediate_int(ureg, &values[0].i, size);
5482 case GL_UNSIGNED_INT:
5483 case GL_BOOL:
5484 return ureg_DECL_immediate_uint(ureg, &values[0].u, size);
5485 default:
5486 assert(!"should not get here - type must be float, int, uint, or bool");
5487 return ureg_src_undef();
5488 }
5489 }
5490
5491 /**
5492 * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register.
5493 */
5494 static struct ureg_dst
5495 dst_register(struct st_translate *t, gl_register_file file, unsigned index,
5496 unsigned array_id)
5497 {
5498 unsigned array;
5499
5500 switch(file) {
5501 case PROGRAM_UNDEFINED:
5502 return ureg_dst_undef();
5503
5504 case PROGRAM_TEMPORARY:
5505 /* Allocate space for temporaries on demand. */
5506 if (index >= t->temps_size) {
5507 const int inc = align(index - t->temps_size + 1, 4096);
5508
5509 t->temps = (struct ureg_dst*)
5510 realloc(t->temps,
5511 (t->temps_size + inc) * sizeof(struct ureg_dst));
5512 if (!t->temps)
5513 return ureg_dst_undef();
5514
5515 memset(t->temps + t->temps_size, 0, inc * sizeof(struct ureg_dst));
5516 t->temps_size += inc;
5517 }
5518
5519 if (ureg_dst_is_undef(t->temps[index]))
5520 t->temps[index] = ureg_DECL_local_temporary(t->ureg);
5521
5522 return t->temps[index];
5523
5524 case PROGRAM_ARRAY:
5525 assert(array_id && array_id <= t->num_temp_arrays);
5526 array = array_id - 1;
5527
5528 if (ureg_dst_is_undef(t->arrays[array]))
5529 t->arrays[array] = ureg_DECL_array_temporary(
5530 t->ureg, t->array_sizes[array], TRUE);
5531
5532 return ureg_dst_array_offset(t->arrays[array], index);
5533
5534 case PROGRAM_OUTPUT:
5535 if (!array_id) {
5536 if (t->procType == PIPE_SHADER_FRAGMENT)
5537 assert(index < 2 * FRAG_RESULT_MAX);
5538 else if (t->procType == PIPE_SHADER_TESS_CTRL ||
5539 t->procType == PIPE_SHADER_TESS_EVAL)
5540 assert(index < VARYING_SLOT_TESS_MAX);
5541 else
5542 assert(index < VARYING_SLOT_MAX);
5543
5544 assert(t->outputMapping[index] < ARRAY_SIZE(t->outputs));
5545 assert(t->outputs[t->outputMapping[index]].File != TGSI_FILE_NULL);
5546 return t->outputs[t->outputMapping[index]];
5547 }
5548 else {
5549 struct inout_decl *decl = find_inout_array(t->output_decls, t->num_output_decls, array_id);
5550 unsigned mesa_index = decl->mesa_index;
5551 int slot = t->outputMapping[mesa_index];
5552
5553 assert(slot != -1 && t->outputs[slot].File == TGSI_FILE_OUTPUT);
5554
5555 struct ureg_dst dst = t->outputs[slot];
5556 dst.ArrayID = array_id;
5557 return ureg_dst_array_offset(dst, index - mesa_index);
5558 }
5559
5560 case PROGRAM_ADDRESS:
5561 return t->address[index];
5562
5563 default:
5564 assert(!"unknown dst register file");
5565 return ureg_dst_undef();
5566 }
5567 }
5568
5569 static struct ureg_src
5570 translate_src(struct st_translate *t, const st_src_reg *src_reg);
5571
5572 static struct ureg_src
5573 translate_addr(struct st_translate *t, const st_src_reg *reladdr,
5574 unsigned addr_index)
5575 {
5576 if (t->need_uarl || !reladdr->is_legal_tgsi_address_operand())
5577 return ureg_src(t->address[addr_index]);
5578
5579 return translate_src(t, reladdr);
5580 }
5581
5582 /**
5583 * Create a TGSI ureg_dst register from an st_dst_reg.
5584 */
5585 static struct ureg_dst
5586 translate_dst(struct st_translate *t,
5587 const st_dst_reg *dst_reg,
5588 bool saturate)
5589 {
5590 struct ureg_dst dst = dst_register(t, dst_reg->file, dst_reg->index,
5591 dst_reg->array_id);
5592
5593 if (dst.File == TGSI_FILE_NULL)
5594 return dst;
5595
5596 dst = ureg_writemask(dst, dst_reg->writemask);
5597
5598 if (saturate)
5599 dst = ureg_saturate(dst);
5600
5601 if (dst_reg->reladdr != NULL) {
5602 assert(dst_reg->file != PROGRAM_TEMPORARY);
5603 dst = ureg_dst_indirect(dst, translate_addr(t, dst_reg->reladdr, 0));
5604 }
5605
5606 if (dst_reg->has_index2) {
5607 if (dst_reg->reladdr2)
5608 dst = ureg_dst_dimension_indirect(dst,
5609 translate_addr(t, dst_reg->reladdr2, 1),
5610 dst_reg->index2D);
5611 else
5612 dst = ureg_dst_dimension(dst, dst_reg->index2D);
5613 }
5614
5615 return dst;
5616 }
5617
5618 /**
5619 * Create a TGSI ureg_src register from an st_src_reg.
5620 */
5621 static struct ureg_src
5622 translate_src(struct st_translate *t, const st_src_reg *src_reg)
5623 {
5624 struct ureg_src src;
5625 int index = src_reg->index;
5626 int double_reg2 = src_reg->double_reg2 ? 1 : 0;
5627
5628 switch(src_reg->file) {
5629 case PROGRAM_UNDEFINED:
5630 src = ureg_imm4f(t->ureg, 0, 0, 0, 0);
5631 break;
5632
5633 case PROGRAM_TEMPORARY:
5634 case PROGRAM_ARRAY:
5635 src = ureg_src(dst_register(t, src_reg->file, src_reg->index, src_reg->array_id));
5636 break;
5637
5638 case PROGRAM_OUTPUT: {
5639 struct ureg_dst dst = dst_register(t, src_reg->file, src_reg->index, src_reg->array_id);
5640 assert(dst.WriteMask != 0);
5641 unsigned shift = ffs(dst.WriteMask) - 1;
5642 src = ureg_swizzle(ureg_src(dst),
5643 shift,
5644 MIN2(shift + 1, 3),
5645 MIN2(shift + 2, 3),
5646 MIN2(shift + 3, 3));
5647 break;
5648 }
5649
5650 case PROGRAM_UNIFORM:
5651 assert(src_reg->index >= 0);
5652 src = src_reg->index < t->num_constants ?
5653 t->constants[src_reg->index] : ureg_imm4f(t->ureg, 0, 0, 0, 0);
5654 break;
5655 case PROGRAM_STATE_VAR:
5656 case PROGRAM_CONSTANT: /* ie, immediate */
5657 if (src_reg->has_index2)
5658 src = ureg_src_register(TGSI_FILE_CONSTANT, src_reg->index);
5659 else
5660 src = src_reg->index >= 0 && src_reg->index < t->num_constants ?
5661 t->constants[src_reg->index] : ureg_imm4f(t->ureg, 0, 0, 0, 0);
5662 break;
5663
5664 case PROGRAM_IMMEDIATE:
5665 assert(src_reg->index >= 0 && src_reg->index < t->num_immediates);
5666 src = t->immediates[src_reg->index];
5667 break;
5668
5669 case PROGRAM_INPUT:
5670 /* GLSL inputs are 64-bit containers, so we have to
5671 * map back to the original index and add the offset after
5672 * mapping. */
5673 index -= double_reg2;
5674 if (!src_reg->array_id) {
5675 assert(t->inputMapping[index] < ARRAY_SIZE(t->inputs));
5676 assert(t->inputs[t->inputMapping[index]].File != TGSI_FILE_NULL);
5677 src = t->inputs[t->inputMapping[index] + double_reg2];
5678 }
5679 else {
5680 struct inout_decl *decl = find_inout_array(t->input_decls, t->num_input_decls,
5681 src_reg->array_id);
5682 unsigned mesa_index = decl->mesa_index;
5683 int slot = t->inputMapping[mesa_index];
5684
5685 assert(slot != -1 && t->inputs[slot].File == TGSI_FILE_INPUT);
5686
5687 src = t->inputs[slot];
5688 src.ArrayID = src_reg->array_id;
5689 src = ureg_src_array_offset(src, index + double_reg2 - mesa_index);
5690 }
5691 break;
5692
5693 case PROGRAM_ADDRESS:
5694 src = ureg_src(t->address[src_reg->index]);
5695 break;
5696
5697 case PROGRAM_SYSTEM_VALUE:
5698 assert(src_reg->index < (int) ARRAY_SIZE(t->systemValues));
5699 src = t->systemValues[src_reg->index];
5700 break;
5701
5702 case PROGRAM_HW_ATOMIC:
5703 src = ureg_src_array_register(TGSI_FILE_HW_ATOMIC, src_reg->index,
5704 src_reg->array_id);
5705 break;
5706
5707 default:
5708 assert(!"unknown src register file");
5709 return ureg_src_undef();
5710 }
5711
5712 if (src_reg->has_index2) {
5713 /* 2D indexes occur with geometry shader inputs (attrib, vertex)
5714 * and UBO constant buffers (buffer, position).
5715 */
5716 if (src_reg->reladdr2)
5717 src = ureg_src_dimension_indirect(src,
5718 translate_addr(t, src_reg->reladdr2, 1),
5719 src_reg->index2D);
5720 else
5721 src = ureg_src_dimension(src, src_reg->index2D);
5722 }
5723
5724 src = ureg_swizzle(src,
5725 GET_SWZ(src_reg->swizzle, 0) & 0x3,
5726 GET_SWZ(src_reg->swizzle, 1) & 0x3,
5727 GET_SWZ(src_reg->swizzle, 2) & 0x3,
5728 GET_SWZ(src_reg->swizzle, 3) & 0x3);
5729
5730 if (src_reg->abs)
5731 src = ureg_abs(src);
5732
5733 if ((src_reg->negate & 0xf) == NEGATE_XYZW)
5734 src = ureg_negate(src);
5735
5736 if (src_reg->reladdr != NULL) {
5737 assert(src_reg->file != PROGRAM_TEMPORARY);
5738 src = ureg_src_indirect(src, translate_addr(t, src_reg->reladdr, 0));
5739 }
5740
5741 return src;
5742 }
5743
5744 static struct tgsi_texture_offset
5745 translate_tex_offset(struct st_translate *t,
5746 const st_src_reg *in_offset)
5747 {
5748 struct tgsi_texture_offset offset;
5749 struct ureg_src src = translate_src(t, in_offset);
5750
5751 offset.File = src.File;
5752 offset.Index = src.Index;
5753 offset.SwizzleX = src.SwizzleX;
5754 offset.SwizzleY = src.SwizzleY;
5755 offset.SwizzleZ = src.SwizzleZ;
5756 offset.Padding = 0;
5757
5758 assert(!src.Indirect);
5759 assert(!src.DimIndirect);
5760 assert(!src.Dimension);
5761 assert(!src.Absolute); /* those shouldn't be used with integers anyway */
5762 assert(!src.Negate);
5763
5764 return offset;
5765 }
5766
5767 static void
5768 compile_tgsi_instruction(struct st_translate *t,
5769 const glsl_to_tgsi_instruction *inst)
5770 {
5771 struct ureg_program *ureg = t->ureg;
5772 int i;
5773 struct ureg_dst dst[2];
5774 struct ureg_src src[4];
5775 struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET];
5776
5777 int num_dst;
5778 int num_src;
5779 unsigned tex_target = 0;
5780
5781 num_dst = num_inst_dst_regs(inst);
5782 num_src = num_inst_src_regs(inst);
5783
5784 for (i = 0; i < num_dst; i++)
5785 dst[i] = translate_dst(t,
5786 &inst->dst[i],
5787 inst->saturate);
5788
5789 for (i = 0; i < num_src; i++)
5790 src[i] = translate_src(t, &inst->src[i]);
5791
5792 switch(inst->op) {
5793 case TGSI_OPCODE_BGNLOOP:
5794 case TGSI_OPCODE_ELSE:
5795 case TGSI_OPCODE_ENDLOOP:
5796 case TGSI_OPCODE_IF:
5797 case TGSI_OPCODE_UIF:
5798 assert(num_dst == 0);
5799 ureg_insn(ureg, inst->op, NULL, 0, src, num_src, inst->precise);
5800 return;
5801
5802 case TGSI_OPCODE_TEX:
5803 case TGSI_OPCODE_TEX_LZ:
5804 case TGSI_OPCODE_TXB:
5805 case TGSI_OPCODE_TXD:
5806 case TGSI_OPCODE_TXL:
5807 case TGSI_OPCODE_TXP:
5808 case TGSI_OPCODE_TXQ:
5809 case TGSI_OPCODE_TXQS:
5810 case TGSI_OPCODE_TXF:
5811 case TGSI_OPCODE_TXF_LZ:
5812 case TGSI_OPCODE_TEX2:
5813 case TGSI_OPCODE_TXB2:
5814 case TGSI_OPCODE_TXL2:
5815 case TGSI_OPCODE_TG4:
5816 case TGSI_OPCODE_LODQ:
5817 if (inst->resource.file == PROGRAM_SAMPLER) {
5818 src[num_src] = t->samplers[inst->resource.index];
5819 } else {
5820 /* Bindless samplers. */
5821 src[num_src] = translate_src(t, &inst->resource);
5822 }
5823 assert(src[num_src].File != TGSI_FILE_NULL);
5824 if (inst->resource.reladdr)
5825 src[num_src] =
5826 ureg_src_indirect(src[num_src],
5827 translate_addr(t, inst->resource.reladdr, 2));
5828 num_src++;
5829 for (i = 0; i < (int)inst->tex_offset_num_offset; i++) {
5830 texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i]);
5831 }
5832 tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow);
5833
5834 ureg_tex_insn(ureg,
5835 inst->op,
5836 dst, num_dst,
5837 tex_target,
5838 st_translate_texture_type(inst->tex_type),
5839 texoffsets, inst->tex_offset_num_offset,
5840 src, num_src);
5841 return;
5842
5843 case TGSI_OPCODE_RESQ:
5844 case TGSI_OPCODE_LOAD:
5845 case TGSI_OPCODE_ATOMUADD:
5846 case TGSI_OPCODE_ATOMXCHG:
5847 case TGSI_OPCODE_ATOMCAS:
5848 case TGSI_OPCODE_ATOMAND:
5849 case TGSI_OPCODE_ATOMOR:
5850 case TGSI_OPCODE_ATOMXOR:
5851 case TGSI_OPCODE_ATOMUMIN:
5852 case TGSI_OPCODE_ATOMUMAX:
5853 case TGSI_OPCODE_ATOMIMIN:
5854 case TGSI_OPCODE_ATOMIMAX:
5855 for (i = num_src - 1; i >= 0; i--)
5856 src[i + 1] = src[i];
5857 num_src++;
5858 if (inst->resource.file == PROGRAM_MEMORY) {
5859 src[0] = t->shared_memory;
5860 } else if (inst->resource.file == PROGRAM_BUFFER) {
5861 src[0] = t->buffers[inst->resource.index];
5862 } else if (inst->resource.file == PROGRAM_HW_ATOMIC) {
5863 src[0] = translate_src(t, &inst->resource);
5864 } else if (inst->resource.file == PROGRAM_CONSTANT) {
5865 assert(inst->resource.has_index2);
5866 src[0] = ureg_src_register(TGSI_FILE_CONSTBUF, inst->resource.index);
5867 } else {
5868 assert(inst->resource.file != PROGRAM_UNDEFINED);
5869 if (inst->resource.file == PROGRAM_IMAGE) {
5870 src[0] = t->images[inst->resource.index];
5871 } else {
5872 /* Bindless images. */
5873 src[0] = translate_src(t, &inst->resource);
5874 }
5875 tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow);
5876 }
5877 if (inst->resource.reladdr)
5878 src[0] = ureg_src_indirect(src[0],
5879 translate_addr(t, inst->resource.reladdr, 2));
5880 assert(src[0].File != TGSI_FILE_NULL);
5881 ureg_memory_insn(ureg, inst->op, dst, num_dst, src, num_src,
5882 inst->buffer_access,
5883 tex_target, inst->image_format);
5884 break;
5885
5886 case TGSI_OPCODE_STORE:
5887 if (inst->resource.file == PROGRAM_MEMORY) {
5888 dst[0] = ureg_dst(t->shared_memory);
5889 } else if (inst->resource.file == PROGRAM_BUFFER) {
5890 dst[0] = ureg_dst(t->buffers[inst->resource.index]);
5891 } else {
5892 if (inst->resource.file == PROGRAM_IMAGE) {
5893 dst[0] = ureg_dst(t->images[inst->resource.index]);
5894 } else {
5895 /* Bindless images. */
5896 dst[0] = ureg_dst(translate_src(t, &inst->resource));
5897 }
5898 tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow);
5899 }
5900 dst[0] = ureg_writemask(dst[0], inst->dst[0].writemask);
5901 if (inst->resource.reladdr)
5902 dst[0] = ureg_dst_indirect(dst[0],
5903 translate_addr(t, inst->resource.reladdr, 2));
5904 assert(dst[0].File != TGSI_FILE_NULL);
5905 ureg_memory_insn(ureg, inst->op, dst, num_dst, src, num_src,
5906 inst->buffer_access,
5907 tex_target, inst->image_format);
5908 break;
5909
5910 default:
5911 ureg_insn(ureg,
5912 inst->op,
5913 dst, num_dst,
5914 src, num_src, inst->precise);
5915 break;
5916 }
5917 }
5918
5919 /**
5920 * Emit the TGSI instructions for inverting and adjusting WPOS.
5921 * This code is unavoidable because it also depends on whether
5922 * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM).
5923 */
5924 static void
5925 emit_wpos_adjustment(struct gl_context *ctx,
5926 struct st_translate *t,
5927 int wpos_transform_const,
5928 boolean invert,
5929 GLfloat adjX, GLfloat adjY[2])
5930 {
5931 struct ureg_program *ureg = t->ureg;
5932
5933 assert(wpos_transform_const >= 0);
5934
5935 /* Fragment program uses fragment position input.
5936 * Need to replace instances of INPUT[WPOS] with temp T
5937 * where T = INPUT[WPOS] is inverted by Y.
5938 */
5939 struct ureg_src wpostrans = ureg_DECL_constant(ureg, wpos_transform_const);
5940 struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg );
5941 struct ureg_src *wpos =
5942 ctx->Const.GLSLFragCoordIsSysVal ?
5943 &t->systemValues[SYSTEM_VALUE_FRAG_COORD] :
5944 &t->inputs[t->inputMapping[VARYING_SLOT_POS]];
5945 struct ureg_src wpos_input = *wpos;
5946
5947 /* First, apply the coordinate shift: */
5948 if (adjX || adjY[0] || adjY[1]) {
5949 if (adjY[0] != adjY[1]) {
5950 /* Adjust the y coordinate by adjY[1] or adjY[0] respectively
5951 * depending on whether inversion is actually going to be applied
5952 * or not, which is determined by testing against the inversion
5953 * state variable used below, which will be either +1 or -1.
5954 */
5955 struct ureg_dst adj_temp = ureg_DECL_local_temporary(ureg);
5956
5957 ureg_CMP(ureg, adj_temp,
5958 ureg_scalar(wpostrans, invert ? 2 : 0),
5959 ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f),
5960 ureg_imm4f(ureg, adjX, adjY[1], 0.0f, 0.0f));
5961 ureg_ADD(ureg, wpos_temp, wpos_input, ureg_src(adj_temp));
5962 } else {
5963 ureg_ADD(ureg, wpos_temp, wpos_input,
5964 ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f));
5965 }
5966 wpos_input = ureg_src(wpos_temp);
5967 } else {
5968 /* MOV wpos_temp, input[wpos]
5969 */
5970 ureg_MOV( ureg, wpos_temp, wpos_input );
5971 }
5972
5973 /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be
5974 * inversion/identity, or the other way around if we're drawing to an FBO.
5975 */
5976 if (invert) {
5977 /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
5978 */
5979 ureg_MAD( ureg,
5980 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
5981 wpos_input,
5982 ureg_scalar(wpostrans, 0),
5983 ureg_scalar(wpostrans, 1));
5984 } else {
5985 /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
5986 */
5987 ureg_MAD( ureg,
5988 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
5989 wpos_input,
5990 ureg_scalar(wpostrans, 2),
5991 ureg_scalar(wpostrans, 3));
5992 }
5993
5994 /* Use wpos_temp as position input from here on:
5995 */
5996 *wpos = ureg_src(wpos_temp);
5997 }
5998
5999
6000 /**
6001 * Emit fragment position/ooordinate code.
6002 */
6003 static void
6004 emit_wpos(struct st_context *st,
6005 struct st_translate *t,
6006 const struct gl_program *program,
6007 struct ureg_program *ureg,
6008 int wpos_transform_const)
6009 {
6010 struct pipe_screen *pscreen = st->pipe->screen;
6011 GLfloat adjX = 0.0f;
6012 GLfloat adjY[2] = { 0.0f, 0.0f };
6013 boolean invert = FALSE;
6014
6015 /* Query the pixel center conventions supported by the pipe driver and set
6016 * adjX, adjY to help out if it cannot handle the requested one internally.
6017 *
6018 * The bias of the y-coordinate depends on whether y-inversion takes place
6019 * (adjY[1]) or not (adjY[0]), which is in turn dependent on whether we are
6020 * drawing to an FBO (causes additional inversion), and whether the pipe
6021 * driver origin and the requested origin differ (the latter condition is
6022 * stored in the 'invert' variable).
6023 *
6024 * For height = 100 (i = integer, h = half-integer, l = lower, u = upper):
6025 *
6026 * center shift only:
6027 * i -> h: +0.5
6028 * h -> i: -0.5
6029 *
6030 * inversion only:
6031 * l,i -> u,i: ( 0.0 + 1.0) * -1 + 100 = 99
6032 * l,h -> u,h: ( 0.5 + 0.0) * -1 + 100 = 99.5
6033 * u,i -> l,i: (99.0 + 1.0) * -1 + 100 = 0
6034 * u,h -> l,h: (99.5 + 0.0) * -1 + 100 = 0.5
6035 *
6036 * inversion and center shift:
6037 * l,i -> u,h: ( 0.0 + 0.5) * -1 + 100 = 99.5
6038 * l,h -> u,i: ( 0.5 + 0.5) * -1 + 100 = 99
6039 * u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5
6040 * u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0
6041 */
6042 if (program->OriginUpperLeft) {
6043 /* Fragment shader wants origin in upper-left */
6044 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
6045 /* the driver supports upper-left origin */
6046 }
6047 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) {
6048 /* the driver supports lower-left origin, need to invert Y */
6049 ureg_property(ureg, TGSI_PROPERTY_FS_COORD_ORIGIN,
6050 TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
6051 invert = TRUE;
6052 }
6053 else
6054 assert(0);
6055 }
6056 else {
6057 /* Fragment shader wants origin in lower-left */
6058 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT))
6059 /* the driver supports lower-left origin */
6060 ureg_property(ureg, TGSI_PROPERTY_FS_COORD_ORIGIN,
6061 TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
6062 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT))
6063 /* the driver supports upper-left origin, need to invert Y */
6064 invert = TRUE;
6065 else
6066 assert(0);
6067 }
6068
6069 if (program->PixelCenterInteger) {
6070 /* Fragment shader wants pixel center integer */
6071 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
6072 /* the driver supports pixel center integer */
6073 adjY[1] = 1.0f;
6074 ureg_property(ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER,
6075 TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
6076 }
6077 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
6078 /* the driver supports pixel center half integer, need to bias X,Y */
6079 adjX = -0.5f;
6080 adjY[0] = -0.5f;
6081 adjY[1] = 0.5f;
6082 }
6083 else
6084 assert(0);
6085 }
6086 else {
6087 /* Fragment shader wants pixel center half integer */
6088 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
6089 /* the driver supports pixel center half integer */
6090 }
6091 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
6092 /* the driver supports pixel center integer, need to bias X,Y */
6093 adjX = adjY[0] = adjY[1] = 0.5f;
6094 ureg_property(ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER,
6095 TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
6096 }
6097 else
6098 assert(0);
6099 }
6100
6101 /* we invert after adjustment so that we avoid the MOV to temporary,
6102 * and reuse the adjustment ADD instead */
6103 emit_wpos_adjustment(st->ctx, t, wpos_transform_const, invert, adjX, adjY);
6104 }
6105
6106 /**
6107 * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back.
6108 * TGSI uses +1 for front, -1 for back.
6109 * This function converts the TGSI value to the GL value. Simply clamping/
6110 * saturating the value to [0,1] does the job.
6111 */
6112 static void
6113 emit_face_var(struct gl_context *ctx, struct st_translate *t)
6114 {
6115 struct ureg_program *ureg = t->ureg;
6116 struct ureg_dst face_temp = ureg_DECL_temporary(ureg);
6117 struct ureg_src face_input = t->inputs[t->inputMapping[VARYING_SLOT_FACE]];
6118
6119 if (ctx->Const.NativeIntegers) {
6120 ureg_FSGE(ureg, face_temp, face_input, ureg_imm1f(ureg, 0));
6121 }
6122 else {
6123 /* MOV_SAT face_temp, input[face] */
6124 ureg_MOV(ureg, ureg_saturate(face_temp), face_input);
6125 }
6126
6127 /* Use face_temp as face input from here on: */
6128 t->inputs[t->inputMapping[VARYING_SLOT_FACE]] = ureg_src(face_temp);
6129 }
6130
6131 static void
6132 emit_compute_block_size(const struct gl_program *prog,
6133 struct ureg_program *ureg) {
6134 ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH,
6135 prog->info.cs.local_size[0]);
6136 ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT,
6137 prog->info.cs.local_size[1]);
6138 ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH,
6139 prog->info.cs.local_size[2]);
6140 }
6141
6142 struct sort_inout_decls {
6143 bool operator()(const struct inout_decl &a, const struct inout_decl &b) const {
6144 return mapping[a.mesa_index] < mapping[b.mesa_index];
6145 }
6146
6147 const ubyte *mapping;
6148 };
6149
6150 /* Sort the given array of decls by the corresponding slot (TGSI file index).
6151 *
6152 * This is for the benefit of older drivers which are broken when the
6153 * declarations aren't sorted in this way.
6154 */
6155 static void
6156 sort_inout_decls_by_slot(struct inout_decl *decls,
6157 unsigned count,
6158 const ubyte mapping[])
6159 {
6160 sort_inout_decls sorter;
6161 sorter.mapping = mapping;
6162 std::sort(decls, decls + count, sorter);
6163 }
6164
6165 static unsigned
6166 st_translate_interp(enum glsl_interp_mode glsl_qual, GLuint varying)
6167 {
6168 switch (glsl_qual) {
6169 case INTERP_MODE_NONE:
6170 if (varying == VARYING_SLOT_COL0 || varying == VARYING_SLOT_COL1)
6171 return TGSI_INTERPOLATE_COLOR;
6172 return TGSI_INTERPOLATE_PERSPECTIVE;
6173 case INTERP_MODE_SMOOTH:
6174 return TGSI_INTERPOLATE_PERSPECTIVE;
6175 case INTERP_MODE_FLAT:
6176 return TGSI_INTERPOLATE_CONSTANT;
6177 case INTERP_MODE_NOPERSPECTIVE:
6178 return TGSI_INTERPOLATE_LINEAR;
6179 default:
6180 assert(0 && "unexpected interp mode in st_translate_interp()");
6181 return TGSI_INTERPOLATE_PERSPECTIVE;
6182 }
6183 }
6184
6185 /**
6186 * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format.
6187 * \param program the program to translate
6188 * \param numInputs number of input registers used
6189 * \param inputMapping maps Mesa fragment program inputs to TGSI generic
6190 * input indexes
6191 * \param inputSemanticName the TGSI_SEMANTIC flag for each input
6192 * \param inputSemanticIndex the semantic index (ex: which texcoord) for
6193 * each input
6194 * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
6195 * \param numOutputs number of output registers used
6196 * \param outputMapping maps Mesa fragment program outputs to TGSI
6197 * generic outputs
6198 * \param outputSemanticName the TGSI_SEMANTIC flag for each output
6199 * \param outputSemanticIndex the semantic index (ex: which texcoord) for
6200 * each output
6201 *
6202 * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY
6203 */
6204 extern "C" enum pipe_error
6205 st_translate_program(
6206 struct gl_context *ctx,
6207 uint procType,
6208 struct ureg_program *ureg,
6209 glsl_to_tgsi_visitor *program,
6210 const struct gl_program *proginfo,
6211 GLuint numInputs,
6212 const ubyte inputMapping[],
6213 const ubyte inputSlotToAttr[],
6214 const ubyte inputSemanticName[],
6215 const ubyte inputSemanticIndex[],
6216 const ubyte interpMode[],
6217 GLuint numOutputs,
6218 const ubyte outputMapping[],
6219 const ubyte outputSemanticName[],
6220 const ubyte outputSemanticIndex[])
6221 {
6222 struct pipe_screen *screen = st_context(ctx)->pipe->screen;
6223 struct st_translate *t;
6224 unsigned i;
6225 struct gl_program_constants *frag_const =
6226 &ctx->Const.Program[MESA_SHADER_FRAGMENT];
6227 enum pipe_error ret = PIPE_OK;
6228
6229 assert(numInputs <= ARRAY_SIZE(t->inputs));
6230 assert(numOutputs <= ARRAY_SIZE(t->outputs));
6231
6232 ASSERT_BITFIELD_SIZE(st_src_reg, type, GLSL_TYPE_ERROR);
6233 ASSERT_BITFIELD_SIZE(st_dst_reg, type, GLSL_TYPE_ERROR);
6234 ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, image_format, PIPE_FORMAT_COUNT);
6235 ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, tex_target,
6236 (gl_texture_index) (NUM_TEXTURE_TARGETS - 1));
6237 ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, image_format,
6238 (enum pipe_format) (PIPE_FORMAT_COUNT - 1));
6239 ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, op, TGSI_OPCODE_LAST - 1);
6240
6241 t = CALLOC_STRUCT(st_translate);
6242 if (!t) {
6243 ret = PIPE_ERROR_OUT_OF_MEMORY;
6244 goto out;
6245 }
6246
6247 t->procType = procType;
6248 t->need_uarl = !screen->get_param(screen, PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS);
6249 t->inputMapping = inputMapping;
6250 t->outputMapping = outputMapping;
6251 t->ureg = ureg;
6252 t->num_temp_arrays = program->next_array;
6253 if (t->num_temp_arrays)
6254 t->arrays = (struct ureg_dst*)
6255 calloc(t->num_temp_arrays, sizeof(t->arrays[0]));
6256
6257 /*
6258 * Declare input attributes.
6259 */
6260 switch (procType) {
6261 case PIPE_SHADER_FRAGMENT:
6262 case PIPE_SHADER_GEOMETRY:
6263 case PIPE_SHADER_TESS_EVAL:
6264 case PIPE_SHADER_TESS_CTRL:
6265 sort_inout_decls_by_slot(program->inputs, program->num_inputs, inputMapping);
6266
6267 for (i = 0; i < program->num_inputs; ++i) {
6268 struct inout_decl *decl = &program->inputs[i];
6269 unsigned slot = inputMapping[decl->mesa_index];
6270 struct ureg_src src;
6271 ubyte tgsi_usage_mask = decl->usage_mask;
6272
6273 if (glsl_base_type_is_64bit(decl->base_type)) {
6274 if (tgsi_usage_mask == 1)
6275 tgsi_usage_mask = TGSI_WRITEMASK_XY;
6276 else if (tgsi_usage_mask == 2)
6277 tgsi_usage_mask = TGSI_WRITEMASK_ZW;
6278 else
6279 tgsi_usage_mask = TGSI_WRITEMASK_XYZW;
6280 }
6281
6282 unsigned interp_mode = 0;
6283 unsigned interp_location = 0;
6284 if (procType == PIPE_SHADER_FRAGMENT) {
6285 assert(interpMode);
6286 interp_mode = interpMode[slot] != TGSI_INTERPOLATE_COUNT ?
6287 interpMode[slot] :
6288 st_translate_interp(decl->interp, inputSlotToAttr[slot]);
6289
6290 interp_location = decl->interp_loc;
6291 }
6292
6293 src = ureg_DECL_fs_input_cyl_centroid_layout(ureg,
6294 inputSemanticName[slot], inputSemanticIndex[slot],
6295 interp_mode, 0, interp_location, slot, tgsi_usage_mask,
6296 decl->array_id, decl->size);
6297
6298 for (unsigned j = 0; j < decl->size; ++j) {
6299 if (t->inputs[slot + j].File != TGSI_FILE_INPUT) {
6300 /* The ArrayID is set up in dst_register */
6301 t->inputs[slot + j] = src;
6302 t->inputs[slot + j].ArrayID = 0;
6303 t->inputs[slot + j].Index += j;
6304 }
6305 }
6306 }
6307 break;
6308 case PIPE_SHADER_VERTEX:
6309 for (i = 0; i < numInputs; i++) {
6310 t->inputs[i] = ureg_DECL_vs_input(ureg, i);
6311 }
6312 break;
6313 case PIPE_SHADER_COMPUTE:
6314 break;
6315 default:
6316 assert(0);
6317 }
6318
6319 /*
6320 * Declare output attributes.
6321 */
6322 switch (procType) {
6323 case PIPE_SHADER_FRAGMENT:
6324 case PIPE_SHADER_COMPUTE:
6325 break;
6326 case PIPE_SHADER_GEOMETRY:
6327 case PIPE_SHADER_TESS_EVAL:
6328 case PIPE_SHADER_TESS_CTRL:
6329 case PIPE_SHADER_VERTEX:
6330 sort_inout_decls_by_slot(program->outputs, program->num_outputs, outputMapping);
6331
6332 for (i = 0; i < program->num_outputs; ++i) {
6333 struct inout_decl *decl = &program->outputs[i];
6334 unsigned slot = outputMapping[decl->mesa_index];
6335 struct ureg_dst dst;
6336 ubyte tgsi_usage_mask = decl->usage_mask;
6337
6338 if (glsl_base_type_is_64bit(decl->base_type)) {
6339 if (tgsi_usage_mask == 1)
6340 tgsi_usage_mask = TGSI_WRITEMASK_XY;
6341 else if (tgsi_usage_mask == 2)
6342 tgsi_usage_mask = TGSI_WRITEMASK_ZW;
6343 else
6344 tgsi_usage_mask = TGSI_WRITEMASK_XYZW;
6345 }
6346
6347 dst = ureg_DECL_output_layout(ureg,
6348 outputSemanticName[slot], outputSemanticIndex[slot],
6349 decl->gs_out_streams,
6350 slot, tgsi_usage_mask, decl->array_id, decl->size);
6351
6352 for (unsigned j = 0; j < decl->size; ++j) {
6353 if (t->outputs[slot + j].File != TGSI_FILE_OUTPUT) {
6354 /* The ArrayID is set up in dst_register */
6355 t->outputs[slot + j] = dst;
6356 t->outputs[slot + j].ArrayID = 0;
6357 t->outputs[slot + j].Index += j;
6358 }
6359 }
6360 }
6361 break;
6362 default:
6363 assert(0);
6364 }
6365
6366 if (procType == PIPE_SHADER_FRAGMENT) {
6367 if (program->shader->Program->info.fs.early_fragment_tests ||
6368 program->shader->Program->info.fs.post_depth_coverage) {
6369 ureg_property(ureg, TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL, 1);
6370
6371 if (program->shader->Program->info.fs.post_depth_coverage)
6372 ureg_property(ureg, TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE, 1);
6373 }
6374
6375 if (proginfo->info.inputs_read & VARYING_BIT_POS) {
6376 /* Must do this after setting up t->inputs. */
6377 emit_wpos(st_context(ctx), t, proginfo, ureg,
6378 program->wpos_transform_const);
6379 }
6380
6381 if (proginfo->info.inputs_read & VARYING_BIT_FACE)
6382 emit_face_var(ctx, t);
6383
6384 for (i = 0; i < numOutputs; i++) {
6385 switch (outputSemanticName[i]) {
6386 case TGSI_SEMANTIC_POSITION:
6387 t->outputs[i] = ureg_DECL_output(ureg,
6388 TGSI_SEMANTIC_POSITION, /* Z/Depth */
6389 outputSemanticIndex[i]);
6390 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z);
6391 break;
6392 case TGSI_SEMANTIC_STENCIL:
6393 t->outputs[i] = ureg_DECL_output(ureg,
6394 TGSI_SEMANTIC_STENCIL, /* Stencil */
6395 outputSemanticIndex[i]);
6396 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y);
6397 break;
6398 case TGSI_SEMANTIC_COLOR:
6399 t->outputs[i] = ureg_DECL_output(ureg,
6400 TGSI_SEMANTIC_COLOR,
6401 outputSemanticIndex[i]);
6402 break;
6403 case TGSI_SEMANTIC_SAMPLEMASK:
6404 t->outputs[i] = ureg_DECL_output(ureg,
6405 TGSI_SEMANTIC_SAMPLEMASK,
6406 outputSemanticIndex[i]);
6407 /* TODO: If we ever support more than 32 samples, this will have
6408 * to become an array.
6409 */
6410 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X);
6411 break;
6412 default:
6413 assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR");
6414 ret = PIPE_ERROR_BAD_INPUT;
6415 goto out;
6416 }
6417 }
6418 }
6419 else if (procType == PIPE_SHADER_VERTEX) {
6420 for (i = 0; i < numOutputs; i++) {
6421 if (outputSemanticName[i] == TGSI_SEMANTIC_FOG) {
6422 /* force register to contain a fog coordinate in the form (F, 0, 0, 1). */
6423 ureg_MOV(ureg,
6424 ureg_writemask(t->outputs[i], TGSI_WRITEMASK_YZW),
6425 ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
6426 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X);
6427 }
6428 }
6429 }
6430
6431 if (procType == PIPE_SHADER_COMPUTE) {
6432 emit_compute_block_size(proginfo, ureg);
6433 }
6434
6435 /* Declare address register.
6436 */
6437 if (program->num_address_regs > 0) {
6438 assert(program->num_address_regs <= 3);
6439 for (int i = 0; i < program->num_address_regs; i++)
6440 t->address[i] = ureg_DECL_address(ureg);
6441 }
6442
6443 /* Declare misc input registers
6444 */
6445 {
6446 GLbitfield sysInputs = proginfo->info.system_values_read;
6447
6448 for (i = 0; sysInputs; i++) {
6449 if (sysInputs & (1 << i)) {
6450 unsigned semName = _mesa_sysval_to_semantic(i);
6451
6452 t->systemValues[i] = ureg_DECL_system_value(ureg, semName, 0);
6453
6454 if (semName == TGSI_SEMANTIC_INSTANCEID ||
6455 semName == TGSI_SEMANTIC_VERTEXID) {
6456 /* From Gallium perspective, these system values are always
6457 * integer, and require native integer support. However, if
6458 * native integer is supported on the vertex stage but not the
6459 * pixel stage (e.g, i915g + draw), Mesa will generate IR that
6460 * assumes these system values are floats. To resolve the
6461 * inconsistency, we insert a U2F.
6462 */
6463 struct st_context *st = st_context(ctx);
6464 struct pipe_screen *pscreen = st->pipe->screen;
6465 assert(procType == PIPE_SHADER_VERTEX);
6466 assert(pscreen->get_shader_param(pscreen, PIPE_SHADER_VERTEX, PIPE_SHADER_CAP_INTEGERS));
6467 (void) pscreen;
6468 if (!ctx->Const.NativeIntegers) {
6469 struct ureg_dst temp = ureg_DECL_local_temporary(t->ureg);
6470 ureg_U2F( t->ureg, ureg_writemask(temp, TGSI_WRITEMASK_X), t->systemValues[i]);
6471 t->systemValues[i] = ureg_scalar(ureg_src(temp), 0);
6472 }
6473 }
6474
6475 if (procType == PIPE_SHADER_FRAGMENT &&
6476 semName == TGSI_SEMANTIC_POSITION)
6477 emit_wpos(st_context(ctx), t, proginfo, ureg,
6478 program->wpos_transform_const);
6479
6480 sysInputs &= ~(1 << i);
6481 }
6482 }
6483 }
6484
6485 t->array_sizes = program->array_sizes;
6486 t->input_decls = program->inputs;
6487 t->num_input_decls = program->num_inputs;
6488 t->output_decls = program->outputs;
6489 t->num_output_decls = program->num_outputs;
6490
6491 /* Emit constants and uniforms. TGSI uses a single index space for these,
6492 * so we put all the translated regs in t->constants.
6493 */
6494 if (proginfo->Parameters) {
6495 t->constants = (struct ureg_src *)
6496 calloc(proginfo->Parameters->NumParameters, sizeof(t->constants[0]));
6497 if (t->constants == NULL) {
6498 ret = PIPE_ERROR_OUT_OF_MEMORY;
6499 goto out;
6500 }
6501 t->num_constants = proginfo->Parameters->NumParameters;
6502
6503 for (i = 0; i < proginfo->Parameters->NumParameters; i++) {
6504 switch (proginfo->Parameters->Parameters[i].Type) {
6505 case PROGRAM_STATE_VAR:
6506 case PROGRAM_UNIFORM:
6507 t->constants[i] = ureg_DECL_constant(ureg, i);
6508 break;
6509
6510 /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect
6511 * addressing of the const buffer.
6512 * FIXME: Be smarter and recognize param arrays:
6513 * indirect addressing is only valid within the referenced
6514 * array.
6515 */
6516 case PROGRAM_CONSTANT:
6517 if (program->indirect_addr_consts)
6518 t->constants[i] = ureg_DECL_constant(ureg, i);
6519 else
6520 t->constants[i] = emit_immediate(t,
6521 proginfo->Parameters->ParameterValues[i],
6522 proginfo->Parameters->Parameters[i].DataType,
6523 4);
6524 break;
6525 default:
6526 break;
6527 }
6528 }
6529 }
6530
6531 for (i = 0; i < proginfo->info.num_ubos; i++) {
6532 unsigned size = proginfo->sh.UniformBlocks[i]->UniformBufferSize;
6533 unsigned num_const_vecs = (size + 15) / 16;
6534 unsigned first, last;
6535 assert(num_const_vecs > 0);
6536 first = 0;
6537 last = num_const_vecs > 0 ? num_const_vecs - 1 : 0;
6538 ureg_DECL_constant2D(t->ureg, first, last, i + 1);
6539 }
6540
6541 /* Emit immediate values.
6542 */
6543 t->immediates = (struct ureg_src *)
6544 calloc(program->num_immediates, sizeof(struct ureg_src));
6545 if (t->immediates == NULL) {
6546 ret = PIPE_ERROR_OUT_OF_MEMORY;
6547 goto out;
6548 }
6549 t->num_immediates = program->num_immediates;
6550
6551 i = 0;
6552 foreach_in_list(immediate_storage, imm, &program->immediates) {
6553 assert(i < program->num_immediates);
6554 t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size32);
6555 }
6556 assert(i == program->num_immediates);
6557
6558 /* texture samplers */
6559 for (i = 0; i < frag_const->MaxTextureImageUnits; i++) {
6560 if (program->samplers_used & (1u << i)) {
6561 enum tgsi_return_type type =
6562 st_translate_texture_type(program->sampler_types[i]);
6563
6564 t->samplers[i] = ureg_DECL_sampler(ureg, i);
6565
6566 ureg_DECL_sampler_view( ureg, i, program->sampler_targets[i],
6567 type, type, type, type );
6568 }
6569 }
6570
6571 /* Declare atomic and shader storage buffers. */
6572 {
6573 struct gl_program *prog = program->prog;
6574
6575 if (!st_context(ctx)->has_hw_atomics) {
6576 for (i = 0; i < prog->info.num_abos; i++) {
6577 unsigned index = prog->sh.AtomicBuffers[i]->Binding;
6578 assert(index < frag_const->MaxAtomicBuffers);
6579 t->buffers[index] = ureg_DECL_buffer(ureg, index, true);
6580 }
6581 } else {
6582 for (i = 0; i < program->num_atomics; i++) {
6583 struct hwatomic_decl *ainfo = &program->atomic_info[i];
6584 gl_uniform_storage *uni_storage = &prog->sh.data->UniformStorage[ainfo->location];
6585 int base = uni_storage->offset / ATOMIC_COUNTER_SIZE;
6586 ureg_DECL_hw_atomic(ureg, base, base + ainfo->size - 1, ainfo->binding,
6587 ainfo->array_id);
6588 }
6589 }
6590
6591 assert(prog->info.num_ssbos <= frag_const->MaxShaderStorageBlocks);
6592 for (i = 0; i < prog->info.num_ssbos; i++) {
6593 unsigned index = i;
6594 if (!st_context(ctx)->has_hw_atomics)
6595 index += frag_const->MaxAtomicBuffers;
6596
6597 t->buffers[index] = ureg_DECL_buffer(ureg, index, false);
6598 }
6599 }
6600
6601 if (program->use_shared_memory)
6602 t->shared_memory = ureg_DECL_memory(ureg, TGSI_MEMORY_TYPE_SHARED);
6603
6604 for (i = 0; i < program->shader->Program->info.num_images; i++) {
6605 if (program->images_used & (1 << i)) {
6606 t->images[i] = ureg_DECL_image(ureg, i,
6607 program->image_targets[i],
6608 program->image_formats[i],
6609 true, false);
6610 }
6611 }
6612
6613 /* Emit each instruction in turn:
6614 */
6615 foreach_in_list(glsl_to_tgsi_instruction, inst, &program->instructions)
6616 compile_tgsi_instruction(t, inst);
6617
6618 /* Set the next shader stage hint for VS and TES. */
6619 switch (procType) {
6620 case PIPE_SHADER_VERTEX:
6621 case PIPE_SHADER_TESS_EVAL:
6622 if (program->shader_program->SeparateShader)
6623 break;
6624
6625 for (i = program->shader->Stage+1; i <= MESA_SHADER_FRAGMENT; i++) {
6626 if (program->shader_program->_LinkedShaders[i]) {
6627 ureg_set_next_shader_processor(
6628 ureg, pipe_shader_type_from_mesa((gl_shader_stage)i));
6629 break;
6630 }
6631 }
6632 break;
6633 }
6634
6635 out:
6636 if (t) {
6637 free(t->arrays);
6638 free(t->temps);
6639 free(t->constants);
6640 t->num_constants = 0;
6641 free(t->immediates);
6642 t->num_immediates = 0;
6643 FREE(t);
6644 }
6645
6646 return ret;
6647 }
6648 /* ----------------------------- End TGSI code ------------------------------ */
6649
6650
6651 /**
6652 * Convert a shader's GLSL IR into a Mesa gl_program, although without
6653 * generating Mesa IR.
6654 */
6655 static struct gl_program *
6656 get_mesa_program_tgsi(struct gl_context *ctx,
6657 struct gl_shader_program *shader_program,
6658 struct gl_linked_shader *shader)
6659 {
6660 glsl_to_tgsi_visitor* v;
6661 struct gl_program *prog;
6662 struct gl_shader_compiler_options *options =
6663 &ctx->Const.ShaderCompilerOptions[shader->Stage];
6664 struct pipe_screen *pscreen = ctx->st->pipe->screen;
6665 enum pipe_shader_type ptarget = pipe_shader_type_from_mesa(shader->Stage);
6666 unsigned skip_merge_registers;
6667
6668 validate_ir_tree(shader->ir);
6669
6670 prog = shader->Program;
6671
6672 prog->Parameters = _mesa_new_parameter_list();
6673 v = new glsl_to_tgsi_visitor();
6674 v->ctx = ctx;
6675 v->prog = prog;
6676 v->shader_program = shader_program;
6677 v->shader = shader;
6678 v->options = options;
6679 v->native_integers = ctx->Const.NativeIntegers;
6680
6681 v->have_sqrt = pscreen->get_shader_param(pscreen, ptarget,
6682 PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED);
6683 v->have_fma = pscreen->get_shader_param(pscreen, ptarget,
6684 PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED);
6685 v->has_tex_txf_lz = pscreen->get_param(pscreen,
6686 PIPE_CAP_TGSI_TEX_TXF_LZ);
6687 v->need_uarl = !pscreen->get_param(pscreen, PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS);
6688
6689 v->variables = _mesa_hash_table_create(v->mem_ctx, _mesa_hash_pointer,
6690 _mesa_key_pointer_equal);
6691 skip_merge_registers =
6692 pscreen->get_shader_param(pscreen, ptarget,
6693 PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS);
6694
6695 _mesa_generate_parameters_list_for_uniforms(ctx, shader_program, shader,
6696 prog->Parameters);
6697
6698 /* Remove reads from output registers. */
6699 if (!pscreen->get_param(pscreen, PIPE_CAP_TGSI_CAN_READ_OUTPUTS))
6700 lower_output_reads(shader->Stage, shader->ir);
6701
6702 /* Emit intermediate IR for main(). */
6703 visit_exec_list(shader->ir, v);
6704
6705 #if 0
6706 /* Print out some information (for debugging purposes) used by the
6707 * optimization passes. */
6708 {
6709 int i;
6710 int *first_writes = ralloc_array(v->mem_ctx, int, v->next_temp);
6711 int *first_reads = ralloc_array(v->mem_ctx, int, v->next_temp);
6712 int *last_writes = ralloc_array(v->mem_ctx, int, v->next_temp);
6713 int *last_reads = ralloc_array(v->mem_ctx, int, v->next_temp);
6714
6715 for (i = 0; i < v->next_temp; i++) {
6716 first_writes[i] = -1;
6717 first_reads[i] = -1;
6718 last_writes[i] = -1;
6719 last_reads[i] = -1;
6720 }
6721 v->get_first_temp_read(first_reads);
6722 v->get_last_temp_read_first_temp_write(last_reads, first_writes);
6723 v->get_last_temp_write(last_writes);
6724 for (i = 0; i < v->next_temp; i++)
6725 printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, first_reads[i],
6726 first_writes[i],
6727 last_reads[i],
6728 last_writes[i]);
6729 ralloc_free(first_writes);
6730 ralloc_free(first_reads);
6731 ralloc_free(last_writes);
6732 ralloc_free(last_reads);
6733 }
6734 #endif
6735
6736 /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
6737 v->simplify_cmp();
6738 v->copy_propagate();
6739
6740 while (v->eliminate_dead_code());
6741
6742 v->merge_two_dsts();
6743 if (!skip_merge_registers)
6744 v->merge_registers();
6745 v->renumber_registers();
6746
6747 /* Write the END instruction. */
6748 v->emit_asm(NULL, TGSI_OPCODE_END);
6749
6750 if (ctx->_Shader->Flags & GLSL_DUMP) {
6751 _mesa_log("\n");
6752 _mesa_log("GLSL IR for linked %s program %d:\n",
6753 _mesa_shader_stage_to_string(shader->Stage),
6754 shader_program->Name);
6755 _mesa_print_ir(_mesa_get_log_file(), shader->ir, NULL);
6756 _mesa_log("\n\n");
6757 }
6758
6759 do_set_program_inouts(shader->ir, prog, shader->Stage);
6760 _mesa_copy_linked_program_data(shader_program, shader);
6761 shrink_array_declarations(v->inputs, v->num_inputs,
6762 &prog->info.inputs_read,
6763 prog->info.double_inputs_read,
6764 &prog->info.patch_inputs_read);
6765 shrink_array_declarations(v->outputs, v->num_outputs,
6766 &prog->info.outputs_written, 0ULL,
6767 &prog->info.patch_outputs_written);
6768 count_resources(v, prog);
6769
6770 /* The GLSL IR won't be needed anymore. */
6771 ralloc_free(shader->ir);
6772 shader->ir = NULL;
6773
6774 /* This must be done before the uniform storage is associated. */
6775 if (shader->Stage == MESA_SHADER_FRAGMENT &&
6776 (prog->info.inputs_read & VARYING_BIT_POS ||
6777 prog->info.system_values_read & (1 << SYSTEM_VALUE_FRAG_COORD))) {
6778 static const gl_state_index wposTransformState[STATE_LENGTH] = {
6779 STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM
6780 };
6781
6782 v->wpos_transform_const = _mesa_add_state_reference(prog->Parameters,
6783 wposTransformState);
6784 }
6785
6786 /* Avoid reallocation of the program parameter list, because the uniform
6787 * storage is only associated with the original parameter list.
6788 * This should be enough for Bitmap and DrawPixels constants.
6789 */
6790 _mesa_reserve_parameter_storage(prog->Parameters, 8);
6791
6792 /* This has to be done last. Any operation the can cause
6793 * prog->ParameterValues to get reallocated (e.g., anything that adds a
6794 * program constant) has to happen before creating this linkage.
6795 */
6796 _mesa_associate_uniform_storage(ctx, shader_program, prog, true);
6797 if (!shader_program->data->LinkStatus) {
6798 free_glsl_to_tgsi_visitor(v);
6799 _mesa_reference_program(ctx, &shader->Program, NULL);
6800 return NULL;
6801 }
6802
6803 struct st_vertex_program *stvp;
6804 struct st_fragment_program *stfp;
6805 struct st_common_program *stp;
6806 struct st_compute_program *stcp;
6807
6808 switch (shader->Stage) {
6809 case MESA_SHADER_VERTEX:
6810 stvp = (struct st_vertex_program *)prog;
6811 stvp->glsl_to_tgsi = v;
6812 break;
6813 case MESA_SHADER_FRAGMENT:
6814 stfp = (struct st_fragment_program *)prog;
6815 stfp->glsl_to_tgsi = v;
6816 break;
6817 case MESA_SHADER_TESS_CTRL:
6818 case MESA_SHADER_TESS_EVAL:
6819 case MESA_SHADER_GEOMETRY:
6820 stp = st_common_program(prog);
6821 stp->glsl_to_tgsi = v;
6822 break;
6823 case MESA_SHADER_COMPUTE:
6824 stcp = (struct st_compute_program *)prog;
6825 stcp->glsl_to_tgsi = v;
6826 break;
6827 default:
6828 assert(!"should not be reached");
6829 return NULL;
6830 }
6831
6832 return prog;
6833 }
6834
6835 /* See if there are unsupported control flow statements. */
6836 class ir_control_flow_info_visitor : public ir_hierarchical_visitor {
6837 private:
6838 const struct gl_shader_compiler_options *options;
6839 public:
6840 ir_control_flow_info_visitor(const struct gl_shader_compiler_options *options)
6841 : options(options),
6842 unsupported(false)
6843 {
6844 }
6845
6846 virtual ir_visitor_status visit_enter(ir_function *ir)
6847 {
6848 /* Other functions are skipped (same as glsl_to_tgsi). */
6849 if (strcmp(ir->name, "main") == 0)
6850 return visit_continue;
6851
6852 return visit_continue_with_parent;
6853 }
6854
6855 virtual ir_visitor_status visit_enter(ir_call *ir)
6856 {
6857 if (!ir->callee->is_intrinsic()) {
6858 unsupported = true; /* it's a function call */
6859 return visit_stop;
6860 }
6861 return visit_continue;
6862 }
6863
6864 virtual ir_visitor_status visit_enter(ir_return *ir)
6865 {
6866 if (options->EmitNoMainReturn) {
6867 unsupported = true;
6868 return visit_stop;
6869 }
6870 return visit_continue;
6871 }
6872
6873 bool unsupported;
6874 };
6875
6876 static bool
6877 has_unsupported_control_flow(exec_list *ir,
6878 const struct gl_shader_compiler_options *options)
6879 {
6880 ir_control_flow_info_visitor visitor(options);
6881 visit_list_elements(&visitor, ir);
6882 return visitor.unsupported;
6883 }
6884
6885 extern "C" {
6886
6887 /**
6888 * Link a shader.
6889 * Called via ctx->Driver.LinkShader()
6890 * This actually involves converting GLSL IR into an intermediate TGSI-like IR
6891 * with code lowering and other optimizations.
6892 */
6893 GLboolean
6894 st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
6895 {
6896 /* Return early if we are loading the shader from on-disk cache */
6897 if (st_load_tgsi_from_disk_cache(ctx, prog)) {
6898 return GL_TRUE;
6899 }
6900
6901 struct pipe_screen *pscreen = ctx->st->pipe->screen;
6902 assert(prog->data->LinkStatus);
6903
6904 bool use_nir = false;
6905 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
6906 if (prog->_LinkedShaders[i] == NULL)
6907 continue;
6908
6909 struct gl_linked_shader *shader = prog->_LinkedShaders[i];
6910 exec_list *ir = shader->ir;
6911 gl_shader_stage stage = shader->Stage;
6912 const struct gl_shader_compiler_options *options =
6913 &ctx->Const.ShaderCompilerOptions[stage];
6914 enum pipe_shader_type ptarget = pipe_shader_type_from_mesa(stage);
6915 bool have_dround = pscreen->get_shader_param(pscreen, ptarget,
6916 PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED);
6917 bool have_dfrexp = pscreen->get_shader_param(pscreen, ptarget,
6918 PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED);
6919 bool have_ldexp = pscreen->get_shader_param(pscreen, ptarget,
6920 PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED);
6921 unsigned if_threshold = pscreen->get_shader_param(pscreen, ptarget,
6922 PIPE_SHADER_CAP_LOWER_IF_THRESHOLD);
6923
6924 enum pipe_shader_ir preferred_ir = (enum pipe_shader_ir)
6925 pscreen->get_shader_param(pscreen, ptarget,
6926 PIPE_SHADER_CAP_PREFERRED_IR);
6927 if (preferred_ir == PIPE_SHADER_IR_NIR)
6928 use_nir = true;
6929
6930 /* If there are forms of indirect addressing that the driver
6931 * cannot handle, perform the lowering pass.
6932 */
6933 if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput ||
6934 options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) {
6935 lower_variable_index_to_cond_assign(stage, ir,
6936 options->EmitNoIndirectInput,
6937 options->EmitNoIndirectOutput,
6938 options->EmitNoIndirectTemp,
6939 options->EmitNoIndirectUniform);
6940 }
6941
6942 if (!pscreen->get_param(pscreen, PIPE_CAP_INT64_DIVMOD))
6943 lower_64bit_integer_instructions(ir, DIV64 | MOD64);
6944
6945 if (ctx->Extensions.ARB_shading_language_packing) {
6946 unsigned lower_inst = LOWER_PACK_SNORM_2x16 |
6947 LOWER_UNPACK_SNORM_2x16 |
6948 LOWER_PACK_UNORM_2x16 |
6949 LOWER_UNPACK_UNORM_2x16 |
6950 LOWER_PACK_SNORM_4x8 |
6951 LOWER_UNPACK_SNORM_4x8 |
6952 LOWER_UNPACK_UNORM_4x8 |
6953 LOWER_PACK_UNORM_4x8;
6954
6955 if (ctx->Extensions.ARB_gpu_shader5)
6956 lower_inst |= LOWER_PACK_USE_BFI |
6957 LOWER_PACK_USE_BFE;
6958 if (!ctx->st->has_half_float_packing)
6959 lower_inst |= LOWER_PACK_HALF_2x16 |
6960 LOWER_UNPACK_HALF_2x16;
6961
6962 lower_packing_builtins(ir, lower_inst);
6963 }
6964
6965 if (!pscreen->get_param(pscreen, PIPE_CAP_TEXTURE_GATHER_OFFSETS))
6966 lower_offset_arrays(ir);
6967 do_mat_op_to_vec(ir);
6968
6969 if (stage == MESA_SHADER_FRAGMENT)
6970 lower_blend_equation_advanced(shader);
6971
6972 lower_instructions(ir,
6973 MOD_TO_FLOOR |
6974 FDIV_TO_MUL_RCP |
6975 EXP_TO_EXP2 |
6976 LOG_TO_LOG2 |
6977 (have_ldexp ? 0 : LDEXP_TO_ARITH) |
6978 (have_dfrexp ? 0 : DFREXP_DLDEXP_TO_ARITH) |
6979 CARRY_TO_ARITH |
6980 BORROW_TO_ARITH |
6981 (have_dround ? 0 : DOPS_TO_DFRAC) |
6982 (options->EmitNoPow ? POW_TO_EXP2 : 0) |
6983 (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0) |
6984 (options->EmitNoSat ? SAT_TO_CLAMP : 0) |
6985 (ctx->Const.ForceGLSLAbsSqrt ? SQRT_TO_ABS_SQRT : 0) |
6986 /* Assume that if ARB_gpu_shader5 is not supported
6987 * then all of the extended integer functions need
6988 * lowering. It may be necessary to add some caps
6989 * for individual instructions.
6990 */
6991 (!ctx->Extensions.ARB_gpu_shader5
6992 ? BIT_COUNT_TO_MATH |
6993 EXTRACT_TO_SHIFTS |
6994 INSERT_TO_SHIFTS |
6995 REVERSE_TO_SHIFTS |
6996 FIND_LSB_TO_FLOAT_CAST |
6997 FIND_MSB_TO_FLOAT_CAST |
6998 IMUL_HIGH_TO_MUL
6999 : 0));
7000
7001 do_vec_index_to_cond_assign(ir);
7002 lower_vector_insert(ir, true);
7003 lower_quadop_vector(ir, false);
7004 lower_noise(ir);
7005 if (options->MaxIfDepth == 0) {
7006 lower_discard(ir);
7007 }
7008
7009 if (ctx->Const.GLSLOptimizeConservatively) {
7010 /* Do it once and repeat only if there's unsupported control flow. */
7011 do {
7012 do_common_optimization(ir, true, true, options,
7013 ctx->Const.NativeIntegers);
7014 lower_if_to_cond_assign((gl_shader_stage)i, ir,
7015 options->MaxIfDepth, if_threshold);
7016 } while (has_unsupported_control_flow(ir, options));
7017 } else {
7018 /* Repeat it until it stops making changes. */
7019 bool progress;
7020 do {
7021 progress = do_common_optimization(ir, true, true, options,
7022 ctx->Const.NativeIntegers);
7023 progress |= lower_if_to_cond_assign((gl_shader_stage)i, ir,
7024 options->MaxIfDepth, if_threshold);
7025 } while (progress);
7026 }
7027
7028 validate_ir_tree(ir);
7029 }
7030
7031 build_program_resource_list(ctx, prog);
7032
7033 if (use_nir)
7034 return st_link_nir(ctx, prog);
7035
7036 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
7037 struct gl_linked_shader *shader = prog->_LinkedShaders[i];
7038 if (shader == NULL)
7039 continue;
7040
7041 struct gl_program *linked_prog =
7042 get_mesa_program_tgsi(ctx, prog, shader);
7043 st_set_prog_affected_state_flags(linked_prog);
7044
7045 if (linked_prog) {
7046 if (!ctx->Driver.ProgramStringNotify(ctx,
7047 _mesa_shader_stage_to_program(i),
7048 linked_prog)) {
7049 _mesa_reference_program(ctx, &shader->Program, NULL);
7050 return GL_FALSE;
7051 }
7052 }
7053 }
7054
7055 return GL_TRUE;
7056 }
7057
7058 void
7059 st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi,
7060 const ubyte outputMapping[],
7061 struct pipe_stream_output_info *so)
7062 {
7063 if (!glsl_to_tgsi->shader_program->last_vert_prog)
7064 return;
7065
7066 struct gl_transform_feedback_info *info =
7067 glsl_to_tgsi->shader_program->last_vert_prog->sh.LinkedTransformFeedback;
7068 st_translate_stream_output_info2(info, outputMapping, so);
7069 }
7070
7071 void
7072 st_translate_stream_output_info2(struct gl_transform_feedback_info *info,
7073 const ubyte outputMapping[],
7074 struct pipe_stream_output_info *so)
7075 {
7076 unsigned i;
7077
7078 for (i = 0; i < info->NumOutputs; i++) {
7079 so->output[i].register_index =
7080 outputMapping[info->Outputs[i].OutputRegister];
7081 so->output[i].start_component = info->Outputs[i].ComponentOffset;
7082 so->output[i].num_components = info->Outputs[i].NumComponents;
7083 so->output[i].output_buffer = info->Outputs[i].OutputBuffer;
7084 so->output[i].dst_offset = info->Outputs[i].DstOffset;
7085 so->output[i].stream = info->Outputs[i].StreamId;
7086 }
7087
7088 for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
7089 so->stride[i] = info->Buffers[i].Stride;
7090 }
7091 so->num_outputs = info->NumOutputs;
7092 }
7093
7094 } /* extern "C" */