nir: add support for user defined select control
[mesa.git] / src / compiler / nir / nir.c
1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Connor Abbott (cwabbott0@gmail.com)
25 *
26 */
27
28 #include "nir.h"
29 #include "nir_control_flow_private.h"
30 #include "util/half_float.h"
31 #include <limits.h>
32 #include <assert.h>
33 #include <math.h>
34 #include "util/u_math.h"
35
36 #include "main/menums.h" /* BITFIELD64_MASK */
37
38 nir_shader *
39 nir_shader_create(void *mem_ctx,
40 gl_shader_stage stage,
41 const nir_shader_compiler_options *options,
42 shader_info *si)
43 {
44 nir_shader *shader = rzalloc(mem_ctx, nir_shader);
45
46 exec_list_make_empty(&shader->uniforms);
47 exec_list_make_empty(&shader->inputs);
48 exec_list_make_empty(&shader->outputs);
49 exec_list_make_empty(&shader->shared);
50
51 shader->options = options;
52
53 if (si) {
54 assert(si->stage == stage);
55 shader->info = *si;
56 } else {
57 shader->info.stage = stage;
58 }
59
60 exec_list_make_empty(&shader->functions);
61 exec_list_make_empty(&shader->registers);
62 exec_list_make_empty(&shader->globals);
63 exec_list_make_empty(&shader->system_values);
64 shader->reg_alloc = 0;
65
66 shader->num_inputs = 0;
67 shader->num_outputs = 0;
68 shader->num_uniforms = 0;
69 shader->num_shared = 0;
70
71 return shader;
72 }
73
74 static nir_register *
75 reg_create(void *mem_ctx, struct exec_list *list)
76 {
77 nir_register *reg = ralloc(mem_ctx, nir_register);
78
79 list_inithead(&reg->uses);
80 list_inithead(&reg->defs);
81 list_inithead(&reg->if_uses);
82
83 reg->num_components = 0;
84 reg->bit_size = 32;
85 reg->num_array_elems = 0;
86 reg->is_packed = false;
87 reg->name = NULL;
88
89 exec_list_push_tail(list, &reg->node);
90
91 return reg;
92 }
93
94 nir_register *
95 nir_global_reg_create(nir_shader *shader)
96 {
97 nir_register *reg = reg_create(shader, &shader->registers);
98 reg->index = shader->reg_alloc++;
99 reg->is_global = true;
100
101 return reg;
102 }
103
104 nir_register *
105 nir_local_reg_create(nir_function_impl *impl)
106 {
107 nir_register *reg = reg_create(ralloc_parent(impl), &impl->registers);
108 reg->index = impl->reg_alloc++;
109 reg->is_global = false;
110
111 return reg;
112 }
113
114 void
115 nir_reg_remove(nir_register *reg)
116 {
117 exec_node_remove(&reg->node);
118 }
119
120 void
121 nir_shader_add_variable(nir_shader *shader, nir_variable *var)
122 {
123 switch (var->data.mode) {
124 case nir_var_all:
125 assert(!"invalid mode");
126 break;
127
128 case nir_var_function_temp:
129 assert(!"nir_shader_add_variable cannot be used for local variables");
130 break;
131
132 case nir_var_shader_temp:
133 exec_list_push_tail(&shader->globals, &var->node);
134 break;
135
136 case nir_var_shader_in:
137 exec_list_push_tail(&shader->inputs, &var->node);
138 break;
139
140 case nir_var_shader_out:
141 exec_list_push_tail(&shader->outputs, &var->node);
142 break;
143
144 case nir_var_uniform:
145 case nir_var_mem_ubo:
146 case nir_var_mem_ssbo:
147 exec_list_push_tail(&shader->uniforms, &var->node);
148 break;
149
150 case nir_var_mem_shared:
151 assert(gl_shader_stage_is_compute(shader->info.stage));
152 exec_list_push_tail(&shader->shared, &var->node);
153 break;
154
155 case nir_var_mem_global:
156 assert(!"nir_shader_add_variable cannot be used for global memory");
157 break;
158
159 case nir_var_system_value:
160 exec_list_push_tail(&shader->system_values, &var->node);
161 break;
162 }
163 }
164
165 nir_variable *
166 nir_variable_create(nir_shader *shader, nir_variable_mode mode,
167 const struct glsl_type *type, const char *name)
168 {
169 nir_variable *var = rzalloc(shader, nir_variable);
170 var->name = ralloc_strdup(var, name);
171 var->type = type;
172 var->data.mode = mode;
173 var->data.how_declared = nir_var_declared_normally;
174
175 if ((mode == nir_var_shader_in &&
176 shader->info.stage != MESA_SHADER_VERTEX) ||
177 (mode == nir_var_shader_out &&
178 shader->info.stage != MESA_SHADER_FRAGMENT))
179 var->data.interpolation = INTERP_MODE_SMOOTH;
180
181 if (mode == nir_var_shader_in || mode == nir_var_uniform)
182 var->data.read_only = true;
183
184 nir_shader_add_variable(shader, var);
185
186 return var;
187 }
188
189 nir_variable *
190 nir_local_variable_create(nir_function_impl *impl,
191 const struct glsl_type *type, const char *name)
192 {
193 nir_variable *var = rzalloc(impl->function->shader, nir_variable);
194 var->name = ralloc_strdup(var, name);
195 var->type = type;
196 var->data.mode = nir_var_function_temp;
197
198 nir_function_impl_add_variable(impl, var);
199
200 return var;
201 }
202
203 nir_function *
204 nir_function_create(nir_shader *shader, const char *name)
205 {
206 nir_function *func = ralloc(shader, nir_function);
207
208 exec_list_push_tail(&shader->functions, &func->node);
209
210 func->name = ralloc_strdup(func, name);
211 func->shader = shader;
212 func->num_params = 0;
213 func->params = NULL;
214 func->impl = NULL;
215 func->is_entrypoint = false;
216
217 return func;
218 }
219
220 /* NOTE: if the instruction you are copying a src to is already added
221 * to the IR, use nir_instr_rewrite_src() instead.
222 */
223 void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx)
224 {
225 dest->is_ssa = src->is_ssa;
226 if (src->is_ssa) {
227 dest->ssa = src->ssa;
228 } else {
229 dest->reg.base_offset = src->reg.base_offset;
230 dest->reg.reg = src->reg.reg;
231 if (src->reg.indirect) {
232 dest->reg.indirect = ralloc(mem_ctx, nir_src);
233 nir_src_copy(dest->reg.indirect, src->reg.indirect, mem_ctx);
234 } else {
235 dest->reg.indirect = NULL;
236 }
237 }
238 }
239
240 void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr)
241 {
242 /* Copying an SSA definition makes no sense whatsoever. */
243 assert(!src->is_ssa);
244
245 dest->is_ssa = false;
246
247 dest->reg.base_offset = src->reg.base_offset;
248 dest->reg.reg = src->reg.reg;
249 if (src->reg.indirect) {
250 dest->reg.indirect = ralloc(instr, nir_src);
251 nir_src_copy(dest->reg.indirect, src->reg.indirect, instr);
252 } else {
253 dest->reg.indirect = NULL;
254 }
255 }
256
257 void
258 nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src,
259 nir_alu_instr *instr)
260 {
261 nir_src_copy(&dest->src, &src->src, &instr->instr);
262 dest->abs = src->abs;
263 dest->negate = src->negate;
264 for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++)
265 dest->swizzle[i] = src->swizzle[i];
266 }
267
268 void
269 nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
270 nir_alu_instr *instr)
271 {
272 nir_dest_copy(&dest->dest, &src->dest, &instr->instr);
273 dest->write_mask = src->write_mask;
274 dest->saturate = src->saturate;
275 }
276
277
278 static void
279 cf_init(nir_cf_node *node, nir_cf_node_type type)
280 {
281 exec_node_init(&node->node);
282 node->parent = NULL;
283 node->type = type;
284 }
285
286 nir_function_impl *
287 nir_function_impl_create_bare(nir_shader *shader)
288 {
289 nir_function_impl *impl = ralloc(shader, nir_function_impl);
290
291 impl->function = NULL;
292
293 cf_init(&impl->cf_node, nir_cf_node_function);
294
295 exec_list_make_empty(&impl->body);
296 exec_list_make_empty(&impl->registers);
297 exec_list_make_empty(&impl->locals);
298 impl->reg_alloc = 0;
299 impl->ssa_alloc = 0;
300 impl->valid_metadata = nir_metadata_none;
301
302 /* create start & end blocks */
303 nir_block *start_block = nir_block_create(shader);
304 nir_block *end_block = nir_block_create(shader);
305 start_block->cf_node.parent = &impl->cf_node;
306 end_block->cf_node.parent = &impl->cf_node;
307 impl->end_block = end_block;
308
309 exec_list_push_tail(&impl->body, &start_block->cf_node.node);
310
311 start_block->successors[0] = end_block;
312 _mesa_set_add(end_block->predecessors, start_block);
313 return impl;
314 }
315
316 nir_function_impl *
317 nir_function_impl_create(nir_function *function)
318 {
319 assert(function->impl == NULL);
320
321 nir_function_impl *impl = nir_function_impl_create_bare(function->shader);
322
323 function->impl = impl;
324 impl->function = function;
325
326 return impl;
327 }
328
329 nir_block *
330 nir_block_create(nir_shader *shader)
331 {
332 nir_block *block = rzalloc(shader, nir_block);
333
334 cf_init(&block->cf_node, nir_cf_node_block);
335
336 block->successors[0] = block->successors[1] = NULL;
337 block->predecessors = _mesa_pointer_set_create(block);
338 block->imm_dom = NULL;
339 /* XXX maybe it would be worth it to defer allocation? This
340 * way it doesn't get allocated for shader refs that never run
341 * nir_calc_dominance? For example, state-tracker creates an
342 * initial IR, clones that, runs appropriate lowering pass, passes
343 * to driver which does common lowering/opt, and then stores ref
344 * which is later used to do state specific lowering and futher
345 * opt. Do any of the references not need dominance metadata?
346 */
347 block->dom_frontier = _mesa_pointer_set_create(block);
348
349 exec_list_make_empty(&block->instr_list);
350
351 return block;
352 }
353
354 static inline void
355 src_init(nir_src *src)
356 {
357 src->is_ssa = false;
358 src->reg.reg = NULL;
359 src->reg.indirect = NULL;
360 src->reg.base_offset = 0;
361 }
362
363 nir_if *
364 nir_if_create(nir_shader *shader)
365 {
366 nir_if *if_stmt = ralloc(shader, nir_if);
367
368 if_stmt->control = nir_selection_control_none;
369
370 cf_init(&if_stmt->cf_node, nir_cf_node_if);
371 src_init(&if_stmt->condition);
372
373 nir_block *then = nir_block_create(shader);
374 exec_list_make_empty(&if_stmt->then_list);
375 exec_list_push_tail(&if_stmt->then_list, &then->cf_node.node);
376 then->cf_node.parent = &if_stmt->cf_node;
377
378 nir_block *else_stmt = nir_block_create(shader);
379 exec_list_make_empty(&if_stmt->else_list);
380 exec_list_push_tail(&if_stmt->else_list, &else_stmt->cf_node.node);
381 else_stmt->cf_node.parent = &if_stmt->cf_node;
382
383 return if_stmt;
384 }
385
386 nir_loop *
387 nir_loop_create(nir_shader *shader)
388 {
389 nir_loop *loop = rzalloc(shader, nir_loop);
390
391 cf_init(&loop->cf_node, nir_cf_node_loop);
392
393 nir_block *body = nir_block_create(shader);
394 exec_list_make_empty(&loop->body);
395 exec_list_push_tail(&loop->body, &body->cf_node.node);
396 body->cf_node.parent = &loop->cf_node;
397
398 body->successors[0] = body;
399 _mesa_set_add(body->predecessors, body);
400
401 return loop;
402 }
403
404 static void
405 instr_init(nir_instr *instr, nir_instr_type type)
406 {
407 instr->type = type;
408 instr->block = NULL;
409 exec_node_init(&instr->node);
410 }
411
412 static void
413 dest_init(nir_dest *dest)
414 {
415 dest->is_ssa = false;
416 dest->reg.reg = NULL;
417 dest->reg.indirect = NULL;
418 dest->reg.base_offset = 0;
419 }
420
421 static void
422 alu_dest_init(nir_alu_dest *dest)
423 {
424 dest_init(&dest->dest);
425 dest->saturate = false;
426 dest->write_mask = 0xf;
427 }
428
429 static void
430 alu_src_init(nir_alu_src *src)
431 {
432 src_init(&src->src);
433 src->abs = src->negate = false;
434 for (int i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i)
435 src->swizzle[i] = i;
436 }
437
438 nir_alu_instr *
439 nir_alu_instr_create(nir_shader *shader, nir_op op)
440 {
441 unsigned num_srcs = nir_op_infos[op].num_inputs;
442 /* TODO: don't use rzalloc */
443 nir_alu_instr *instr =
444 rzalloc_size(shader,
445 sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src));
446
447 instr_init(&instr->instr, nir_instr_type_alu);
448 instr->op = op;
449 alu_dest_init(&instr->dest);
450 for (unsigned i = 0; i < num_srcs; i++)
451 alu_src_init(&instr->src[i]);
452
453 return instr;
454 }
455
456 nir_deref_instr *
457 nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type)
458 {
459 nir_deref_instr *instr =
460 rzalloc_size(shader, sizeof(nir_deref_instr));
461
462 instr_init(&instr->instr, nir_instr_type_deref);
463
464 instr->deref_type = deref_type;
465 if (deref_type != nir_deref_type_var)
466 src_init(&instr->parent);
467
468 if (deref_type == nir_deref_type_array ||
469 deref_type == nir_deref_type_ptr_as_array)
470 src_init(&instr->arr.index);
471
472 dest_init(&instr->dest);
473
474 return instr;
475 }
476
477 nir_jump_instr *
478 nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
479 {
480 nir_jump_instr *instr = ralloc(shader, nir_jump_instr);
481 instr_init(&instr->instr, nir_instr_type_jump);
482 instr->type = type;
483 return instr;
484 }
485
486 nir_load_const_instr *
487 nir_load_const_instr_create(nir_shader *shader, unsigned num_components,
488 unsigned bit_size)
489 {
490 nir_load_const_instr *instr = rzalloc(shader, nir_load_const_instr);
491 instr_init(&instr->instr, nir_instr_type_load_const);
492
493 nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size, NULL);
494
495 return instr;
496 }
497
498 nir_intrinsic_instr *
499 nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op)
500 {
501 unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
502 /* TODO: don't use rzalloc */
503 nir_intrinsic_instr *instr =
504 rzalloc_size(shader,
505 sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src));
506
507 instr_init(&instr->instr, nir_instr_type_intrinsic);
508 instr->intrinsic = op;
509
510 if (nir_intrinsic_infos[op].has_dest)
511 dest_init(&instr->dest);
512
513 for (unsigned i = 0; i < num_srcs; i++)
514 src_init(&instr->src[i]);
515
516 return instr;
517 }
518
519 nir_call_instr *
520 nir_call_instr_create(nir_shader *shader, nir_function *callee)
521 {
522 const unsigned num_params = callee->num_params;
523 nir_call_instr *instr =
524 rzalloc_size(shader, sizeof(*instr) +
525 num_params * sizeof(instr->params[0]));
526
527 instr_init(&instr->instr, nir_instr_type_call);
528 instr->callee = callee;
529 instr->num_params = num_params;
530 for (unsigned i = 0; i < num_params; i++)
531 src_init(&instr->params[i]);
532
533 return instr;
534 }
535
536 static int8_t default_tg4_offsets[4][2] =
537 {
538 { 0, 1 },
539 { 1, 1 },
540 { 1, 0 },
541 { 0, 0 },
542 };
543
544 nir_tex_instr *
545 nir_tex_instr_create(nir_shader *shader, unsigned num_srcs)
546 {
547 nir_tex_instr *instr = rzalloc(shader, nir_tex_instr);
548 instr_init(&instr->instr, nir_instr_type_tex);
549
550 dest_init(&instr->dest);
551
552 instr->num_srcs = num_srcs;
553 instr->src = ralloc_array(instr, nir_tex_src, num_srcs);
554 for (unsigned i = 0; i < num_srcs; i++)
555 src_init(&instr->src[i].src);
556
557 instr->texture_index = 0;
558 instr->texture_array_size = 0;
559 instr->sampler_index = 0;
560 memcpy(instr->tg4_offsets, default_tg4_offsets, sizeof(instr->tg4_offsets));
561
562 return instr;
563 }
564
565 void
566 nir_tex_instr_add_src(nir_tex_instr *tex,
567 nir_tex_src_type src_type,
568 nir_src src)
569 {
570 nir_tex_src *new_srcs = rzalloc_array(tex, nir_tex_src,
571 tex->num_srcs + 1);
572
573 for (unsigned i = 0; i < tex->num_srcs; i++) {
574 new_srcs[i].src_type = tex->src[i].src_type;
575 nir_instr_move_src(&tex->instr, &new_srcs[i].src,
576 &tex->src[i].src);
577 }
578
579 ralloc_free(tex->src);
580 tex->src = new_srcs;
581
582 tex->src[tex->num_srcs].src_type = src_type;
583 nir_instr_rewrite_src(&tex->instr, &tex->src[tex->num_srcs].src, src);
584 tex->num_srcs++;
585 }
586
587 void
588 nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx)
589 {
590 assert(src_idx < tex->num_srcs);
591
592 /* First rewrite the source to NIR_SRC_INIT */
593 nir_instr_rewrite_src(&tex->instr, &tex->src[src_idx].src, NIR_SRC_INIT);
594
595 /* Now, move all of the other sources down */
596 for (unsigned i = src_idx + 1; i < tex->num_srcs; i++) {
597 tex->src[i-1].src_type = tex->src[i].src_type;
598 nir_instr_move_src(&tex->instr, &tex->src[i-1].src, &tex->src[i].src);
599 }
600 tex->num_srcs--;
601 }
602
603 bool
604 nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex)
605 {
606 if (tex->op != nir_texop_tg4)
607 return false;
608 return memcmp(tex->tg4_offsets, default_tg4_offsets,
609 sizeof(tex->tg4_offsets)) != 0;
610 }
611
612 nir_phi_instr *
613 nir_phi_instr_create(nir_shader *shader)
614 {
615 nir_phi_instr *instr = ralloc(shader, nir_phi_instr);
616 instr_init(&instr->instr, nir_instr_type_phi);
617
618 dest_init(&instr->dest);
619 exec_list_make_empty(&instr->srcs);
620 return instr;
621 }
622
623 nir_parallel_copy_instr *
624 nir_parallel_copy_instr_create(nir_shader *shader)
625 {
626 nir_parallel_copy_instr *instr = ralloc(shader, nir_parallel_copy_instr);
627 instr_init(&instr->instr, nir_instr_type_parallel_copy);
628
629 exec_list_make_empty(&instr->entries);
630
631 return instr;
632 }
633
634 nir_ssa_undef_instr *
635 nir_ssa_undef_instr_create(nir_shader *shader,
636 unsigned num_components,
637 unsigned bit_size)
638 {
639 nir_ssa_undef_instr *instr = ralloc(shader, nir_ssa_undef_instr);
640 instr_init(&instr->instr, nir_instr_type_ssa_undef);
641
642 nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size, NULL);
643
644 return instr;
645 }
646
647 static nir_const_value
648 const_value_float(double d, unsigned bit_size)
649 {
650 nir_const_value v;
651 switch (bit_size) {
652 case 16: v.u16[0] = _mesa_float_to_half(d); break;
653 case 32: v.f32[0] = d; break;
654 case 64: v.f64[0] = d; break;
655 default:
656 unreachable("Invalid bit size");
657 }
658 return v;
659 }
660
661 static nir_const_value
662 const_value_int(int64_t i, unsigned bit_size)
663 {
664 nir_const_value v;
665 switch (bit_size) {
666 case 1: v.b[0] = i & 1; break;
667 case 8: v.i8[0] = i; break;
668 case 16: v.i16[0] = i; break;
669 case 32: v.i32[0] = i; break;
670 case 64: v.i64[0] = i; break;
671 default:
672 unreachable("Invalid bit size");
673 }
674 return v;
675 }
676
677 nir_const_value
678 nir_alu_binop_identity(nir_op binop, unsigned bit_size)
679 {
680 const int64_t max_int = (1ull << (bit_size - 1)) - 1;
681 const int64_t min_int = -max_int - 1;
682 switch (binop) {
683 case nir_op_iadd:
684 return const_value_int(0, bit_size);
685 case nir_op_fadd:
686 return const_value_float(0, bit_size);
687 case nir_op_imul:
688 return const_value_int(1, bit_size);
689 case nir_op_fmul:
690 return const_value_float(1, bit_size);
691 case nir_op_imin:
692 return const_value_int(max_int, bit_size);
693 case nir_op_umin:
694 return const_value_int(~0ull, bit_size);
695 case nir_op_fmin:
696 return const_value_float(INFINITY, bit_size);
697 case nir_op_imax:
698 return const_value_int(min_int, bit_size);
699 case nir_op_umax:
700 return const_value_int(0, bit_size);
701 case nir_op_fmax:
702 return const_value_float(-INFINITY, bit_size);
703 case nir_op_iand:
704 return const_value_int(~0ull, bit_size);
705 case nir_op_ior:
706 return const_value_int(0, bit_size);
707 case nir_op_ixor:
708 return const_value_int(0, bit_size);
709 default:
710 unreachable("Invalid reduction operation");
711 }
712 }
713
714 nir_function_impl *
715 nir_cf_node_get_function(nir_cf_node *node)
716 {
717 while (node->type != nir_cf_node_function) {
718 node = node->parent;
719 }
720
721 return nir_cf_node_as_function(node);
722 }
723
724 /* Reduces a cursor by trying to convert everything to after and trying to
725 * go up to block granularity when possible.
726 */
727 static nir_cursor
728 reduce_cursor(nir_cursor cursor)
729 {
730 switch (cursor.option) {
731 case nir_cursor_before_block:
732 assert(nir_cf_node_prev(&cursor.block->cf_node) == NULL ||
733 nir_cf_node_prev(&cursor.block->cf_node)->type != nir_cf_node_block);
734 if (exec_list_is_empty(&cursor.block->instr_list)) {
735 /* Empty block. After is as good as before. */
736 cursor.option = nir_cursor_after_block;
737 }
738 return cursor;
739
740 case nir_cursor_after_block:
741 return cursor;
742
743 case nir_cursor_before_instr: {
744 nir_instr *prev_instr = nir_instr_prev(cursor.instr);
745 if (prev_instr) {
746 /* Before this instruction is after the previous */
747 cursor.instr = prev_instr;
748 cursor.option = nir_cursor_after_instr;
749 } else {
750 /* No previous instruction. Switch to before block */
751 cursor.block = cursor.instr->block;
752 cursor.option = nir_cursor_before_block;
753 }
754 return reduce_cursor(cursor);
755 }
756
757 case nir_cursor_after_instr:
758 if (nir_instr_next(cursor.instr) == NULL) {
759 /* This is the last instruction, switch to after block */
760 cursor.option = nir_cursor_after_block;
761 cursor.block = cursor.instr->block;
762 }
763 return cursor;
764
765 default:
766 unreachable("Inavlid cursor option");
767 }
768 }
769
770 bool
771 nir_cursors_equal(nir_cursor a, nir_cursor b)
772 {
773 /* Reduced cursors should be unique */
774 a = reduce_cursor(a);
775 b = reduce_cursor(b);
776
777 return a.block == b.block && a.option == b.option;
778 }
779
780 static bool
781 add_use_cb(nir_src *src, void *state)
782 {
783 nir_instr *instr = state;
784
785 src->parent_instr = instr;
786 list_addtail(&src->use_link,
787 src->is_ssa ? &src->ssa->uses : &src->reg.reg->uses);
788
789 return true;
790 }
791
792 static bool
793 add_ssa_def_cb(nir_ssa_def *def, void *state)
794 {
795 nir_instr *instr = state;
796
797 if (instr->block && def->index == UINT_MAX) {
798 nir_function_impl *impl =
799 nir_cf_node_get_function(&instr->block->cf_node);
800
801 def->index = impl->ssa_alloc++;
802 }
803
804 return true;
805 }
806
807 static bool
808 add_reg_def_cb(nir_dest *dest, void *state)
809 {
810 nir_instr *instr = state;
811
812 if (!dest->is_ssa) {
813 dest->reg.parent_instr = instr;
814 list_addtail(&dest->reg.def_link, &dest->reg.reg->defs);
815 }
816
817 return true;
818 }
819
820 static void
821 add_defs_uses(nir_instr *instr)
822 {
823 nir_foreach_src(instr, add_use_cb, instr);
824 nir_foreach_dest(instr, add_reg_def_cb, instr);
825 nir_foreach_ssa_def(instr, add_ssa_def_cb, instr);
826 }
827
828 void
829 nir_instr_insert(nir_cursor cursor, nir_instr *instr)
830 {
831 switch (cursor.option) {
832 case nir_cursor_before_block:
833 /* Only allow inserting jumps into empty blocks. */
834 if (instr->type == nir_instr_type_jump)
835 assert(exec_list_is_empty(&cursor.block->instr_list));
836
837 instr->block = cursor.block;
838 add_defs_uses(instr);
839 exec_list_push_head(&cursor.block->instr_list, &instr->node);
840 break;
841 case nir_cursor_after_block: {
842 /* Inserting instructions after a jump is illegal. */
843 nir_instr *last = nir_block_last_instr(cursor.block);
844 assert(last == NULL || last->type != nir_instr_type_jump);
845 (void) last;
846
847 instr->block = cursor.block;
848 add_defs_uses(instr);
849 exec_list_push_tail(&cursor.block->instr_list, &instr->node);
850 break;
851 }
852 case nir_cursor_before_instr:
853 assert(instr->type != nir_instr_type_jump);
854 instr->block = cursor.instr->block;
855 add_defs_uses(instr);
856 exec_node_insert_node_before(&cursor.instr->node, &instr->node);
857 break;
858 case nir_cursor_after_instr:
859 /* Inserting instructions after a jump is illegal. */
860 assert(cursor.instr->type != nir_instr_type_jump);
861
862 /* Only allow inserting jumps at the end of the block. */
863 if (instr->type == nir_instr_type_jump)
864 assert(cursor.instr == nir_block_last_instr(cursor.instr->block));
865
866 instr->block = cursor.instr->block;
867 add_defs_uses(instr);
868 exec_node_insert_after(&cursor.instr->node, &instr->node);
869 break;
870 }
871
872 if (instr->type == nir_instr_type_jump)
873 nir_handle_add_jump(instr->block);
874 }
875
876 static bool
877 src_is_valid(const nir_src *src)
878 {
879 return src->is_ssa ? (src->ssa != NULL) : (src->reg.reg != NULL);
880 }
881
882 static bool
883 remove_use_cb(nir_src *src, void *state)
884 {
885 (void) state;
886
887 if (src_is_valid(src))
888 list_del(&src->use_link);
889
890 return true;
891 }
892
893 static bool
894 remove_def_cb(nir_dest *dest, void *state)
895 {
896 (void) state;
897
898 if (!dest->is_ssa)
899 list_del(&dest->reg.def_link);
900
901 return true;
902 }
903
904 static void
905 remove_defs_uses(nir_instr *instr)
906 {
907 nir_foreach_dest(instr, remove_def_cb, instr);
908 nir_foreach_src(instr, remove_use_cb, instr);
909 }
910
911 void nir_instr_remove_v(nir_instr *instr)
912 {
913 remove_defs_uses(instr);
914 exec_node_remove(&instr->node);
915
916 if (instr->type == nir_instr_type_jump) {
917 nir_jump_instr *jump_instr = nir_instr_as_jump(instr);
918 nir_handle_remove_jump(instr->block, jump_instr->type);
919 }
920 }
921
922 /*@}*/
923
924 void
925 nir_index_local_regs(nir_function_impl *impl)
926 {
927 unsigned index = 0;
928 foreach_list_typed(nir_register, reg, node, &impl->registers) {
929 reg->index = index++;
930 }
931 impl->reg_alloc = index;
932 }
933
934 void
935 nir_index_global_regs(nir_shader *shader)
936 {
937 unsigned index = 0;
938 foreach_list_typed(nir_register, reg, node, &shader->registers) {
939 reg->index = index++;
940 }
941 shader->reg_alloc = index;
942 }
943
944 static bool
945 visit_alu_dest(nir_alu_instr *instr, nir_foreach_dest_cb cb, void *state)
946 {
947 return cb(&instr->dest.dest, state);
948 }
949
950 static bool
951 visit_deref_dest(nir_deref_instr *instr, nir_foreach_dest_cb cb, void *state)
952 {
953 return cb(&instr->dest, state);
954 }
955
956 static bool
957 visit_intrinsic_dest(nir_intrinsic_instr *instr, nir_foreach_dest_cb cb,
958 void *state)
959 {
960 if (nir_intrinsic_infos[instr->intrinsic].has_dest)
961 return cb(&instr->dest, state);
962
963 return true;
964 }
965
966 static bool
967 visit_texture_dest(nir_tex_instr *instr, nir_foreach_dest_cb cb,
968 void *state)
969 {
970 return cb(&instr->dest, state);
971 }
972
973 static bool
974 visit_phi_dest(nir_phi_instr *instr, nir_foreach_dest_cb cb, void *state)
975 {
976 return cb(&instr->dest, state);
977 }
978
979 static bool
980 visit_parallel_copy_dest(nir_parallel_copy_instr *instr,
981 nir_foreach_dest_cb cb, void *state)
982 {
983 nir_foreach_parallel_copy_entry(entry, instr) {
984 if (!cb(&entry->dest, state))
985 return false;
986 }
987
988 return true;
989 }
990
991 bool
992 nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state)
993 {
994 switch (instr->type) {
995 case nir_instr_type_alu:
996 return visit_alu_dest(nir_instr_as_alu(instr), cb, state);
997 case nir_instr_type_deref:
998 return visit_deref_dest(nir_instr_as_deref(instr), cb, state);
999 case nir_instr_type_intrinsic:
1000 return visit_intrinsic_dest(nir_instr_as_intrinsic(instr), cb, state);
1001 case nir_instr_type_tex:
1002 return visit_texture_dest(nir_instr_as_tex(instr), cb, state);
1003 case nir_instr_type_phi:
1004 return visit_phi_dest(nir_instr_as_phi(instr), cb, state);
1005 case nir_instr_type_parallel_copy:
1006 return visit_parallel_copy_dest(nir_instr_as_parallel_copy(instr),
1007 cb, state);
1008
1009 case nir_instr_type_load_const:
1010 case nir_instr_type_ssa_undef:
1011 case nir_instr_type_call:
1012 case nir_instr_type_jump:
1013 break;
1014
1015 default:
1016 unreachable("Invalid instruction type");
1017 break;
1018 }
1019
1020 return true;
1021 }
1022
1023 struct foreach_ssa_def_state {
1024 nir_foreach_ssa_def_cb cb;
1025 void *client_state;
1026 };
1027
1028 static inline bool
1029 nir_ssa_def_visitor(nir_dest *dest, void *void_state)
1030 {
1031 struct foreach_ssa_def_state *state = void_state;
1032
1033 if (dest->is_ssa)
1034 return state->cb(&dest->ssa, state->client_state);
1035 else
1036 return true;
1037 }
1038
1039 bool
1040 nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state)
1041 {
1042 switch (instr->type) {
1043 case nir_instr_type_alu:
1044 case nir_instr_type_deref:
1045 case nir_instr_type_tex:
1046 case nir_instr_type_intrinsic:
1047 case nir_instr_type_phi:
1048 case nir_instr_type_parallel_copy: {
1049 struct foreach_ssa_def_state foreach_state = {cb, state};
1050 return nir_foreach_dest(instr, nir_ssa_def_visitor, &foreach_state);
1051 }
1052
1053 case nir_instr_type_load_const:
1054 return cb(&nir_instr_as_load_const(instr)->def, state);
1055 case nir_instr_type_ssa_undef:
1056 return cb(&nir_instr_as_ssa_undef(instr)->def, state);
1057 case nir_instr_type_call:
1058 case nir_instr_type_jump:
1059 return true;
1060 default:
1061 unreachable("Invalid instruction type");
1062 }
1063 }
1064
1065 static bool
1066 visit_src(nir_src *src, nir_foreach_src_cb cb, void *state)
1067 {
1068 if (!cb(src, state))
1069 return false;
1070 if (!src->is_ssa && src->reg.indirect)
1071 return cb(src->reg.indirect, state);
1072 return true;
1073 }
1074
1075 static bool
1076 visit_alu_src(nir_alu_instr *instr, nir_foreach_src_cb cb, void *state)
1077 {
1078 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
1079 if (!visit_src(&instr->src[i].src, cb, state))
1080 return false;
1081
1082 return true;
1083 }
1084
1085 static bool
1086 visit_deref_instr_src(nir_deref_instr *instr,
1087 nir_foreach_src_cb cb, void *state)
1088 {
1089 if (instr->deref_type != nir_deref_type_var) {
1090 if (!visit_src(&instr->parent, cb, state))
1091 return false;
1092 }
1093
1094 if (instr->deref_type == nir_deref_type_array ||
1095 instr->deref_type == nir_deref_type_ptr_as_array) {
1096 if (!visit_src(&instr->arr.index, cb, state))
1097 return false;
1098 }
1099
1100 return true;
1101 }
1102
1103 static bool
1104 visit_tex_src(nir_tex_instr *instr, nir_foreach_src_cb cb, void *state)
1105 {
1106 for (unsigned i = 0; i < instr->num_srcs; i++) {
1107 if (!visit_src(&instr->src[i].src, cb, state))
1108 return false;
1109 }
1110
1111 return true;
1112 }
1113
1114 static bool
1115 visit_intrinsic_src(nir_intrinsic_instr *instr, nir_foreach_src_cb cb,
1116 void *state)
1117 {
1118 unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
1119 for (unsigned i = 0; i < num_srcs; i++) {
1120 if (!visit_src(&instr->src[i], cb, state))
1121 return false;
1122 }
1123
1124 return true;
1125 }
1126
1127 static bool
1128 visit_call_src(nir_call_instr *instr, nir_foreach_src_cb cb, void *state)
1129 {
1130 for (unsigned i = 0; i < instr->num_params; i++) {
1131 if (!visit_src(&instr->params[i], cb, state))
1132 return false;
1133 }
1134
1135 return true;
1136 }
1137
1138 static bool
1139 visit_phi_src(nir_phi_instr *instr, nir_foreach_src_cb cb, void *state)
1140 {
1141 nir_foreach_phi_src(src, instr) {
1142 if (!visit_src(&src->src, cb, state))
1143 return false;
1144 }
1145
1146 return true;
1147 }
1148
1149 static bool
1150 visit_parallel_copy_src(nir_parallel_copy_instr *instr,
1151 nir_foreach_src_cb cb, void *state)
1152 {
1153 nir_foreach_parallel_copy_entry(entry, instr) {
1154 if (!visit_src(&entry->src, cb, state))
1155 return false;
1156 }
1157
1158 return true;
1159 }
1160
1161 typedef struct {
1162 void *state;
1163 nir_foreach_src_cb cb;
1164 } visit_dest_indirect_state;
1165
1166 static bool
1167 visit_dest_indirect(nir_dest *dest, void *_state)
1168 {
1169 visit_dest_indirect_state *state = (visit_dest_indirect_state *) _state;
1170
1171 if (!dest->is_ssa && dest->reg.indirect)
1172 return state->cb(dest->reg.indirect, state->state);
1173
1174 return true;
1175 }
1176
1177 bool
1178 nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state)
1179 {
1180 switch (instr->type) {
1181 case nir_instr_type_alu:
1182 if (!visit_alu_src(nir_instr_as_alu(instr), cb, state))
1183 return false;
1184 break;
1185 case nir_instr_type_deref:
1186 if (!visit_deref_instr_src(nir_instr_as_deref(instr), cb, state))
1187 return false;
1188 break;
1189 case nir_instr_type_intrinsic:
1190 if (!visit_intrinsic_src(nir_instr_as_intrinsic(instr), cb, state))
1191 return false;
1192 break;
1193 case nir_instr_type_tex:
1194 if (!visit_tex_src(nir_instr_as_tex(instr), cb, state))
1195 return false;
1196 break;
1197 case nir_instr_type_call:
1198 if (!visit_call_src(nir_instr_as_call(instr), cb, state))
1199 return false;
1200 break;
1201 case nir_instr_type_load_const:
1202 /* Constant load instructions have no regular sources */
1203 break;
1204 case nir_instr_type_phi:
1205 if (!visit_phi_src(nir_instr_as_phi(instr), cb, state))
1206 return false;
1207 break;
1208 case nir_instr_type_parallel_copy:
1209 if (!visit_parallel_copy_src(nir_instr_as_parallel_copy(instr),
1210 cb, state))
1211 return false;
1212 break;
1213 case nir_instr_type_jump:
1214 case nir_instr_type_ssa_undef:
1215 return true;
1216
1217 default:
1218 unreachable("Invalid instruction type");
1219 break;
1220 }
1221
1222 visit_dest_indirect_state dest_state;
1223 dest_state.state = state;
1224 dest_state.cb = cb;
1225 return nir_foreach_dest(instr, visit_dest_indirect, &dest_state);
1226 }
1227
1228 int64_t
1229 nir_src_comp_as_int(nir_src src, unsigned comp)
1230 {
1231 assert(nir_src_is_const(src));
1232 nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1233
1234 assert(comp < load->def.num_components);
1235 switch (load->def.bit_size) {
1236 /* int1_t uses 0/-1 convention */
1237 case 1: return -(int)load->value.b[comp];
1238 case 8: return load->value.i8[comp];
1239 case 16: return load->value.i16[comp];
1240 case 32: return load->value.i32[comp];
1241 case 64: return load->value.i64[comp];
1242 default:
1243 unreachable("Invalid bit size");
1244 }
1245 }
1246
1247 uint64_t
1248 nir_src_comp_as_uint(nir_src src, unsigned comp)
1249 {
1250 assert(nir_src_is_const(src));
1251 nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1252
1253 assert(comp < load->def.num_components);
1254 switch (load->def.bit_size) {
1255 case 1: return load->value.b[comp];
1256 case 8: return load->value.u8[comp];
1257 case 16: return load->value.u16[comp];
1258 case 32: return load->value.u32[comp];
1259 case 64: return load->value.u64[comp];
1260 default:
1261 unreachable("Invalid bit size");
1262 }
1263 }
1264
1265 bool
1266 nir_src_comp_as_bool(nir_src src, unsigned comp)
1267 {
1268 int64_t i = nir_src_comp_as_int(src, comp);
1269
1270 /* Booleans of any size use 0/-1 convention */
1271 assert(i == 0 || i == -1);
1272
1273 return i;
1274 }
1275
1276 double
1277 nir_src_comp_as_float(nir_src src, unsigned comp)
1278 {
1279 assert(nir_src_is_const(src));
1280 nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1281
1282 assert(comp < load->def.num_components);
1283 switch (load->def.bit_size) {
1284 case 16: return _mesa_half_to_float(load->value.u16[comp]);
1285 case 32: return load->value.f32[comp];
1286 case 64: return load->value.f64[comp];
1287 default:
1288 unreachable("Invalid bit size");
1289 }
1290 }
1291
1292 int64_t
1293 nir_src_as_int(nir_src src)
1294 {
1295 assert(nir_src_num_components(src) == 1);
1296 return nir_src_comp_as_int(src, 0);
1297 }
1298
1299 uint64_t
1300 nir_src_as_uint(nir_src src)
1301 {
1302 assert(nir_src_num_components(src) == 1);
1303 return nir_src_comp_as_uint(src, 0);
1304 }
1305
1306 bool
1307 nir_src_as_bool(nir_src src)
1308 {
1309 assert(nir_src_num_components(src) == 1);
1310 return nir_src_comp_as_bool(src, 0);
1311 }
1312
1313 double
1314 nir_src_as_float(nir_src src)
1315 {
1316 assert(nir_src_num_components(src) == 1);
1317 return nir_src_comp_as_float(src, 0);
1318 }
1319
1320 nir_const_value *
1321 nir_src_as_const_value(nir_src src)
1322 {
1323 if (!src.is_ssa)
1324 return NULL;
1325
1326 if (src.ssa->parent_instr->type != nir_instr_type_load_const)
1327 return NULL;
1328
1329 nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1330
1331 return &load->value;
1332 }
1333
1334 /**
1335 * Returns true if the source is known to be dynamically uniform. Otherwise it
1336 * returns false which means it may or may not be dynamically uniform but it
1337 * can't be determined.
1338 */
1339 bool
1340 nir_src_is_dynamically_uniform(nir_src src)
1341 {
1342 if (!src.is_ssa)
1343 return false;
1344
1345 /* Constants are trivially dynamically uniform */
1346 if (src.ssa->parent_instr->type == nir_instr_type_load_const)
1347 return true;
1348
1349 /* As are uniform variables */
1350 if (src.ssa->parent_instr->type == nir_instr_type_intrinsic) {
1351 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(src.ssa->parent_instr);
1352
1353 if (intr->intrinsic == nir_intrinsic_load_uniform)
1354 return true;
1355 }
1356
1357 /* XXX: this could have many more tests, such as when a sampler function is
1358 * called with dynamically uniform arguments.
1359 */
1360 return false;
1361 }
1362
1363 static void
1364 src_remove_all_uses(nir_src *src)
1365 {
1366 for (; src; src = src->is_ssa ? NULL : src->reg.indirect) {
1367 if (!src_is_valid(src))
1368 continue;
1369
1370 list_del(&src->use_link);
1371 }
1372 }
1373
1374 static void
1375 src_add_all_uses(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
1376 {
1377 for (; src; src = src->is_ssa ? NULL : src->reg.indirect) {
1378 if (!src_is_valid(src))
1379 continue;
1380
1381 if (parent_instr) {
1382 src->parent_instr = parent_instr;
1383 if (src->is_ssa)
1384 list_addtail(&src->use_link, &src->ssa->uses);
1385 else
1386 list_addtail(&src->use_link, &src->reg.reg->uses);
1387 } else {
1388 assert(parent_if);
1389 src->parent_if = parent_if;
1390 if (src->is_ssa)
1391 list_addtail(&src->use_link, &src->ssa->if_uses);
1392 else
1393 list_addtail(&src->use_link, &src->reg.reg->if_uses);
1394 }
1395 }
1396 }
1397
1398 void
1399 nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src)
1400 {
1401 assert(!src_is_valid(src) || src->parent_instr == instr);
1402
1403 src_remove_all_uses(src);
1404 *src = new_src;
1405 src_add_all_uses(src, instr, NULL);
1406 }
1407
1408 void
1409 nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src)
1410 {
1411 assert(!src_is_valid(dest) || dest->parent_instr == dest_instr);
1412
1413 src_remove_all_uses(dest);
1414 src_remove_all_uses(src);
1415 *dest = *src;
1416 *src = NIR_SRC_INIT;
1417 src_add_all_uses(dest, dest_instr, NULL);
1418 }
1419
1420 void
1421 nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src)
1422 {
1423 nir_src *src = &if_stmt->condition;
1424 assert(!src_is_valid(src) || src->parent_if == if_stmt);
1425
1426 src_remove_all_uses(src);
1427 *src = new_src;
1428 src_add_all_uses(src, NULL, if_stmt);
1429 }
1430
1431 void
1432 nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, nir_dest new_dest)
1433 {
1434 if (dest->is_ssa) {
1435 /* We can only overwrite an SSA destination if it has no uses. */
1436 assert(list_empty(&dest->ssa.uses) && list_empty(&dest->ssa.if_uses));
1437 } else {
1438 list_del(&dest->reg.def_link);
1439 if (dest->reg.indirect)
1440 src_remove_all_uses(dest->reg.indirect);
1441 }
1442
1443 /* We can't re-write with an SSA def */
1444 assert(!new_dest.is_ssa);
1445
1446 nir_dest_copy(dest, &new_dest, instr);
1447
1448 dest->reg.parent_instr = instr;
1449 list_addtail(&dest->reg.def_link, &new_dest.reg.reg->defs);
1450
1451 if (dest->reg.indirect)
1452 src_add_all_uses(dest->reg.indirect, instr, NULL);
1453 }
1454
1455 /* note: does *not* take ownership of 'name' */
1456 void
1457 nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
1458 unsigned num_components,
1459 unsigned bit_size, const char *name)
1460 {
1461 def->name = ralloc_strdup(instr, name);
1462 def->parent_instr = instr;
1463 list_inithead(&def->uses);
1464 list_inithead(&def->if_uses);
1465 def->num_components = num_components;
1466 def->bit_size = bit_size;
1467
1468 if (instr->block) {
1469 nir_function_impl *impl =
1470 nir_cf_node_get_function(&instr->block->cf_node);
1471
1472 def->index = impl->ssa_alloc++;
1473 } else {
1474 def->index = UINT_MAX;
1475 }
1476 }
1477
1478 /* note: does *not* take ownership of 'name' */
1479 void
1480 nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
1481 unsigned num_components, unsigned bit_size,
1482 const char *name)
1483 {
1484 dest->is_ssa = true;
1485 nir_ssa_def_init(instr, &dest->ssa, num_components, bit_size, name);
1486 }
1487
1488 void
1489 nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src)
1490 {
1491 assert(!new_src.is_ssa || def != new_src.ssa);
1492
1493 nir_foreach_use_safe(use_src, def)
1494 nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
1495
1496 nir_foreach_if_use_safe(use_src, def)
1497 nir_if_rewrite_condition(use_src->parent_if, new_src);
1498 }
1499
1500 static bool
1501 is_instr_between(nir_instr *start, nir_instr *end, nir_instr *between)
1502 {
1503 assert(start->block == end->block);
1504
1505 if (between->block != start->block)
1506 return false;
1507
1508 /* Search backwards looking for "between" */
1509 while (start != end) {
1510 if (between == end)
1511 return true;
1512
1513 end = nir_instr_prev(end);
1514 assert(end);
1515 }
1516
1517 return false;
1518 }
1519
1520 /* Replaces all uses of the given SSA def with the given source but only if
1521 * the use comes after the after_me instruction. This can be useful if you
1522 * are emitting code to fix up the result of some instruction: you can freely
1523 * use the result in that code and then call rewrite_uses_after and pass the
1524 * last fixup instruction as after_me and it will replace all of the uses you
1525 * want without touching the fixup code.
1526 *
1527 * This function assumes that after_me is in the same block as
1528 * def->parent_instr and that after_me comes after def->parent_instr.
1529 */
1530 void
1531 nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src,
1532 nir_instr *after_me)
1533 {
1534 if (new_src.is_ssa && def == new_src.ssa)
1535 return;
1536
1537 nir_foreach_use_safe(use_src, def) {
1538 assert(use_src->parent_instr != def->parent_instr);
1539 /* Since def already dominates all of its uses, the only way a use can
1540 * not be dominated by after_me is if it is between def and after_me in
1541 * the instruction list.
1542 */
1543 if (!is_instr_between(def->parent_instr, after_me, use_src->parent_instr))
1544 nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
1545 }
1546
1547 nir_foreach_if_use_safe(use_src, def)
1548 nir_if_rewrite_condition(use_src->parent_if, new_src);
1549 }
1550
1551 nir_component_mask_t
1552 nir_ssa_def_components_read(const nir_ssa_def *def)
1553 {
1554 nir_component_mask_t read_mask = 0;
1555 nir_foreach_use(use, def) {
1556 if (use->parent_instr->type == nir_instr_type_alu) {
1557 nir_alu_instr *alu = nir_instr_as_alu(use->parent_instr);
1558 nir_alu_src *alu_src = exec_node_data(nir_alu_src, use, src);
1559 int src_idx = alu_src - &alu->src[0];
1560 assert(src_idx >= 0 && src_idx < nir_op_infos[alu->op].num_inputs);
1561 read_mask |= nir_alu_instr_src_read_mask(alu, src_idx);
1562 } else {
1563 return (1 << def->num_components) - 1;
1564 }
1565 }
1566
1567 if (!list_empty(&def->if_uses))
1568 read_mask |= 1;
1569
1570 return read_mask;
1571 }
1572
1573 nir_block *
1574 nir_block_cf_tree_next(nir_block *block)
1575 {
1576 if (block == NULL) {
1577 /* nir_foreach_block_safe() will call this function on a NULL block
1578 * after the last iteration, but it won't use the result so just return
1579 * NULL here.
1580 */
1581 return NULL;
1582 }
1583
1584 nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node);
1585 if (cf_next)
1586 return nir_cf_node_cf_tree_first(cf_next);
1587
1588 nir_cf_node *parent = block->cf_node.parent;
1589
1590 switch (parent->type) {
1591 case nir_cf_node_if: {
1592 /* Are we at the end of the if? Go to the beginning of the else */
1593 nir_if *if_stmt = nir_cf_node_as_if(parent);
1594 if (block == nir_if_last_then_block(if_stmt))
1595 return nir_if_first_else_block(if_stmt);
1596
1597 assert(block == nir_if_last_else_block(if_stmt));
1598 /* fall through */
1599 }
1600
1601 case nir_cf_node_loop:
1602 return nir_cf_node_as_block(nir_cf_node_next(parent));
1603
1604 case nir_cf_node_function:
1605 return NULL;
1606
1607 default:
1608 unreachable("unknown cf node type");
1609 }
1610 }
1611
1612 nir_block *
1613 nir_block_cf_tree_prev(nir_block *block)
1614 {
1615 if (block == NULL) {
1616 /* do this for consistency with nir_block_cf_tree_next() */
1617 return NULL;
1618 }
1619
1620 nir_cf_node *cf_prev = nir_cf_node_prev(&block->cf_node);
1621 if (cf_prev)
1622 return nir_cf_node_cf_tree_last(cf_prev);
1623
1624 nir_cf_node *parent = block->cf_node.parent;
1625
1626 switch (parent->type) {
1627 case nir_cf_node_if: {
1628 /* Are we at the beginning of the else? Go to the end of the if */
1629 nir_if *if_stmt = nir_cf_node_as_if(parent);
1630 if (block == nir_if_first_else_block(if_stmt))
1631 return nir_if_last_then_block(if_stmt);
1632
1633 assert(block == nir_if_first_then_block(if_stmt));
1634 /* fall through */
1635 }
1636
1637 case nir_cf_node_loop:
1638 return nir_cf_node_as_block(nir_cf_node_prev(parent));
1639
1640 case nir_cf_node_function:
1641 return NULL;
1642
1643 default:
1644 unreachable("unknown cf node type");
1645 }
1646 }
1647
1648 nir_block *nir_cf_node_cf_tree_first(nir_cf_node *node)
1649 {
1650 switch (node->type) {
1651 case nir_cf_node_function: {
1652 nir_function_impl *impl = nir_cf_node_as_function(node);
1653 return nir_start_block(impl);
1654 }
1655
1656 case nir_cf_node_if: {
1657 nir_if *if_stmt = nir_cf_node_as_if(node);
1658 return nir_if_first_then_block(if_stmt);
1659 }
1660
1661 case nir_cf_node_loop: {
1662 nir_loop *loop = nir_cf_node_as_loop(node);
1663 return nir_loop_first_block(loop);
1664 }
1665
1666 case nir_cf_node_block: {
1667 return nir_cf_node_as_block(node);
1668 }
1669
1670 default:
1671 unreachable("unknown node type");
1672 }
1673 }
1674
1675 nir_block *nir_cf_node_cf_tree_last(nir_cf_node *node)
1676 {
1677 switch (node->type) {
1678 case nir_cf_node_function: {
1679 nir_function_impl *impl = nir_cf_node_as_function(node);
1680 return nir_impl_last_block(impl);
1681 }
1682
1683 case nir_cf_node_if: {
1684 nir_if *if_stmt = nir_cf_node_as_if(node);
1685 return nir_if_last_else_block(if_stmt);
1686 }
1687
1688 case nir_cf_node_loop: {
1689 nir_loop *loop = nir_cf_node_as_loop(node);
1690 return nir_loop_last_block(loop);
1691 }
1692
1693 case nir_cf_node_block: {
1694 return nir_cf_node_as_block(node);
1695 }
1696
1697 default:
1698 unreachable("unknown node type");
1699 }
1700 }
1701
1702 nir_block *nir_cf_node_cf_tree_next(nir_cf_node *node)
1703 {
1704 if (node->type == nir_cf_node_block)
1705 return nir_block_cf_tree_next(nir_cf_node_as_block(node));
1706 else if (node->type == nir_cf_node_function)
1707 return NULL;
1708 else
1709 return nir_cf_node_as_block(nir_cf_node_next(node));
1710 }
1711
1712 nir_if *
1713 nir_block_get_following_if(nir_block *block)
1714 {
1715 if (exec_node_is_tail_sentinel(&block->cf_node.node))
1716 return NULL;
1717
1718 if (nir_cf_node_is_last(&block->cf_node))
1719 return NULL;
1720
1721 nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
1722
1723 if (next_node->type != nir_cf_node_if)
1724 return NULL;
1725
1726 return nir_cf_node_as_if(next_node);
1727 }
1728
1729 nir_loop *
1730 nir_block_get_following_loop(nir_block *block)
1731 {
1732 if (exec_node_is_tail_sentinel(&block->cf_node.node))
1733 return NULL;
1734
1735 if (nir_cf_node_is_last(&block->cf_node))
1736 return NULL;
1737
1738 nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
1739
1740 if (next_node->type != nir_cf_node_loop)
1741 return NULL;
1742
1743 return nir_cf_node_as_loop(next_node);
1744 }
1745
1746 void
1747 nir_index_blocks(nir_function_impl *impl)
1748 {
1749 unsigned index = 0;
1750
1751 if (impl->valid_metadata & nir_metadata_block_index)
1752 return;
1753
1754 nir_foreach_block(block, impl) {
1755 block->index = index++;
1756 }
1757
1758 /* The end_block isn't really part of the program, which is why its index
1759 * is >= num_blocks.
1760 */
1761 impl->num_blocks = impl->end_block->index = index;
1762 }
1763
1764 static bool
1765 index_ssa_def_cb(nir_ssa_def *def, void *state)
1766 {
1767 unsigned *index = (unsigned *) state;
1768 def->index = (*index)++;
1769
1770 return true;
1771 }
1772
1773 /**
1774 * The indices are applied top-to-bottom which has the very nice property
1775 * that, if A dominates B, then A->index <= B->index.
1776 */
1777 void
1778 nir_index_ssa_defs(nir_function_impl *impl)
1779 {
1780 unsigned index = 0;
1781
1782 nir_foreach_block(block, impl) {
1783 nir_foreach_instr(instr, block)
1784 nir_foreach_ssa_def(instr, index_ssa_def_cb, &index);
1785 }
1786
1787 impl->ssa_alloc = index;
1788 }
1789
1790 /**
1791 * The indices are applied top-to-bottom which has the very nice property
1792 * that, if A dominates B, then A->index <= B->index.
1793 */
1794 unsigned
1795 nir_index_instrs(nir_function_impl *impl)
1796 {
1797 unsigned index = 0;
1798
1799 nir_foreach_block(block, impl) {
1800 nir_foreach_instr(instr, block)
1801 instr->index = index++;
1802 }
1803
1804 return index;
1805 }
1806
1807 nir_intrinsic_op
1808 nir_intrinsic_from_system_value(gl_system_value val)
1809 {
1810 switch (val) {
1811 case SYSTEM_VALUE_VERTEX_ID:
1812 return nir_intrinsic_load_vertex_id;
1813 case SYSTEM_VALUE_INSTANCE_ID:
1814 return nir_intrinsic_load_instance_id;
1815 case SYSTEM_VALUE_DRAW_ID:
1816 return nir_intrinsic_load_draw_id;
1817 case SYSTEM_VALUE_BASE_INSTANCE:
1818 return nir_intrinsic_load_base_instance;
1819 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
1820 return nir_intrinsic_load_vertex_id_zero_base;
1821 case SYSTEM_VALUE_IS_INDEXED_DRAW:
1822 return nir_intrinsic_load_is_indexed_draw;
1823 case SYSTEM_VALUE_FIRST_VERTEX:
1824 return nir_intrinsic_load_first_vertex;
1825 case SYSTEM_VALUE_BASE_VERTEX:
1826 return nir_intrinsic_load_base_vertex;
1827 case SYSTEM_VALUE_INVOCATION_ID:
1828 return nir_intrinsic_load_invocation_id;
1829 case SYSTEM_VALUE_FRAG_COORD:
1830 return nir_intrinsic_load_frag_coord;
1831 case SYSTEM_VALUE_FRONT_FACE:
1832 return nir_intrinsic_load_front_face;
1833 case SYSTEM_VALUE_SAMPLE_ID:
1834 return nir_intrinsic_load_sample_id;
1835 case SYSTEM_VALUE_SAMPLE_POS:
1836 return nir_intrinsic_load_sample_pos;
1837 case SYSTEM_VALUE_SAMPLE_MASK_IN:
1838 return nir_intrinsic_load_sample_mask_in;
1839 case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
1840 return nir_intrinsic_load_local_invocation_id;
1841 case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX:
1842 return nir_intrinsic_load_local_invocation_index;
1843 case SYSTEM_VALUE_WORK_GROUP_ID:
1844 return nir_intrinsic_load_work_group_id;
1845 case SYSTEM_VALUE_NUM_WORK_GROUPS:
1846 return nir_intrinsic_load_num_work_groups;
1847 case SYSTEM_VALUE_PRIMITIVE_ID:
1848 return nir_intrinsic_load_primitive_id;
1849 case SYSTEM_VALUE_TESS_COORD:
1850 return nir_intrinsic_load_tess_coord;
1851 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1852 return nir_intrinsic_load_tess_level_outer;
1853 case SYSTEM_VALUE_TESS_LEVEL_INNER:
1854 return nir_intrinsic_load_tess_level_inner;
1855 case SYSTEM_VALUE_VERTICES_IN:
1856 return nir_intrinsic_load_patch_vertices_in;
1857 case SYSTEM_VALUE_HELPER_INVOCATION:
1858 return nir_intrinsic_load_helper_invocation;
1859 case SYSTEM_VALUE_VIEW_INDEX:
1860 return nir_intrinsic_load_view_index;
1861 case SYSTEM_VALUE_SUBGROUP_SIZE:
1862 return nir_intrinsic_load_subgroup_size;
1863 case SYSTEM_VALUE_SUBGROUP_INVOCATION:
1864 return nir_intrinsic_load_subgroup_invocation;
1865 case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
1866 return nir_intrinsic_load_subgroup_eq_mask;
1867 case SYSTEM_VALUE_SUBGROUP_GE_MASK:
1868 return nir_intrinsic_load_subgroup_ge_mask;
1869 case SYSTEM_VALUE_SUBGROUP_GT_MASK:
1870 return nir_intrinsic_load_subgroup_gt_mask;
1871 case SYSTEM_VALUE_SUBGROUP_LE_MASK:
1872 return nir_intrinsic_load_subgroup_le_mask;
1873 case SYSTEM_VALUE_SUBGROUP_LT_MASK:
1874 return nir_intrinsic_load_subgroup_lt_mask;
1875 case SYSTEM_VALUE_NUM_SUBGROUPS:
1876 return nir_intrinsic_load_num_subgroups;
1877 case SYSTEM_VALUE_SUBGROUP_ID:
1878 return nir_intrinsic_load_subgroup_id;
1879 case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
1880 return nir_intrinsic_load_local_group_size;
1881 case SYSTEM_VALUE_GLOBAL_INVOCATION_ID:
1882 return nir_intrinsic_load_global_invocation_id;
1883 case SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX:
1884 return nir_intrinsic_load_global_invocation_index;
1885 case SYSTEM_VALUE_WORK_DIM:
1886 return nir_intrinsic_load_work_dim;
1887 default:
1888 unreachable("system value does not directly correspond to intrinsic");
1889 }
1890 }
1891
1892 gl_system_value
1893 nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
1894 {
1895 switch (intrin) {
1896 case nir_intrinsic_load_vertex_id:
1897 return SYSTEM_VALUE_VERTEX_ID;
1898 case nir_intrinsic_load_instance_id:
1899 return SYSTEM_VALUE_INSTANCE_ID;
1900 case nir_intrinsic_load_draw_id:
1901 return SYSTEM_VALUE_DRAW_ID;
1902 case nir_intrinsic_load_base_instance:
1903 return SYSTEM_VALUE_BASE_INSTANCE;
1904 case nir_intrinsic_load_vertex_id_zero_base:
1905 return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
1906 case nir_intrinsic_load_first_vertex:
1907 return SYSTEM_VALUE_FIRST_VERTEX;
1908 case nir_intrinsic_load_is_indexed_draw:
1909 return SYSTEM_VALUE_IS_INDEXED_DRAW;
1910 case nir_intrinsic_load_base_vertex:
1911 return SYSTEM_VALUE_BASE_VERTEX;
1912 case nir_intrinsic_load_invocation_id:
1913 return SYSTEM_VALUE_INVOCATION_ID;
1914 case nir_intrinsic_load_frag_coord:
1915 return SYSTEM_VALUE_FRAG_COORD;
1916 case nir_intrinsic_load_front_face:
1917 return SYSTEM_VALUE_FRONT_FACE;
1918 case nir_intrinsic_load_sample_id:
1919 return SYSTEM_VALUE_SAMPLE_ID;
1920 case nir_intrinsic_load_sample_pos:
1921 return SYSTEM_VALUE_SAMPLE_POS;
1922 case nir_intrinsic_load_sample_mask_in:
1923 return SYSTEM_VALUE_SAMPLE_MASK_IN;
1924 case nir_intrinsic_load_local_invocation_id:
1925 return SYSTEM_VALUE_LOCAL_INVOCATION_ID;
1926 case nir_intrinsic_load_local_invocation_index:
1927 return SYSTEM_VALUE_LOCAL_INVOCATION_INDEX;
1928 case nir_intrinsic_load_num_work_groups:
1929 return SYSTEM_VALUE_NUM_WORK_GROUPS;
1930 case nir_intrinsic_load_work_group_id:
1931 return SYSTEM_VALUE_WORK_GROUP_ID;
1932 case nir_intrinsic_load_primitive_id:
1933 return SYSTEM_VALUE_PRIMITIVE_ID;
1934 case nir_intrinsic_load_tess_coord:
1935 return SYSTEM_VALUE_TESS_COORD;
1936 case nir_intrinsic_load_tess_level_outer:
1937 return SYSTEM_VALUE_TESS_LEVEL_OUTER;
1938 case nir_intrinsic_load_tess_level_inner:
1939 return SYSTEM_VALUE_TESS_LEVEL_INNER;
1940 case nir_intrinsic_load_patch_vertices_in:
1941 return SYSTEM_VALUE_VERTICES_IN;
1942 case nir_intrinsic_load_helper_invocation:
1943 return SYSTEM_VALUE_HELPER_INVOCATION;
1944 case nir_intrinsic_load_view_index:
1945 return SYSTEM_VALUE_VIEW_INDEX;
1946 case nir_intrinsic_load_subgroup_size:
1947 return SYSTEM_VALUE_SUBGROUP_SIZE;
1948 case nir_intrinsic_load_subgroup_invocation:
1949 return SYSTEM_VALUE_SUBGROUP_INVOCATION;
1950 case nir_intrinsic_load_subgroup_eq_mask:
1951 return SYSTEM_VALUE_SUBGROUP_EQ_MASK;
1952 case nir_intrinsic_load_subgroup_ge_mask:
1953 return SYSTEM_VALUE_SUBGROUP_GE_MASK;
1954 case nir_intrinsic_load_subgroup_gt_mask:
1955 return SYSTEM_VALUE_SUBGROUP_GT_MASK;
1956 case nir_intrinsic_load_subgroup_le_mask:
1957 return SYSTEM_VALUE_SUBGROUP_LE_MASK;
1958 case nir_intrinsic_load_subgroup_lt_mask:
1959 return SYSTEM_VALUE_SUBGROUP_LT_MASK;
1960 case nir_intrinsic_load_num_subgroups:
1961 return SYSTEM_VALUE_NUM_SUBGROUPS;
1962 case nir_intrinsic_load_subgroup_id:
1963 return SYSTEM_VALUE_SUBGROUP_ID;
1964 case nir_intrinsic_load_local_group_size:
1965 return SYSTEM_VALUE_LOCAL_GROUP_SIZE;
1966 case nir_intrinsic_load_global_invocation_id:
1967 return SYSTEM_VALUE_GLOBAL_INVOCATION_ID;
1968 default:
1969 unreachable("intrinsic doesn't produce a system value");
1970 }
1971 }
1972
1973 /* OpenGL utility method that remaps the location attributes if they are
1974 * doubles. Not needed for vulkan due the differences on the input location
1975 * count for doubles on vulkan vs OpenGL
1976 *
1977 * The bitfield returned in dual_slot is one bit for each double input slot in
1978 * the original OpenGL single-slot input numbering. The mapping from old
1979 * locations to new locations is as follows:
1980 *
1981 * new_loc = loc + util_bitcount(dual_slot & BITFIELD64_MASK(loc))
1982 */
1983 void
1984 nir_remap_dual_slot_attributes(nir_shader *shader, uint64_t *dual_slot)
1985 {
1986 assert(shader->info.stage == MESA_SHADER_VERTEX);
1987
1988 *dual_slot = 0;
1989 nir_foreach_variable(var, &shader->inputs) {
1990 if (glsl_type_is_dual_slot(glsl_without_array(var->type))) {
1991 unsigned slots = glsl_count_attribute_slots(var->type, true);
1992 *dual_slot |= BITFIELD64_MASK(slots) << var->data.location;
1993 }
1994 }
1995
1996 nir_foreach_variable(var, &shader->inputs) {
1997 var->data.location +=
1998 util_bitcount64(*dual_slot & BITFIELD64_MASK(var->data.location));
1999 }
2000 }
2001
2002 /* Returns an attribute mask that has been re-compacted using the given
2003 * dual_slot mask.
2004 */
2005 uint64_t
2006 nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot)
2007 {
2008 while (dual_slot) {
2009 unsigned loc = u_bit_scan64(&dual_slot);
2010 /* mask of all bits up to and including loc */
2011 uint64_t mask = BITFIELD64_MASK(loc + 1);
2012 attribs = (attribs & mask) | ((attribs & ~mask) >> 1);
2013 }
2014 return attribs;
2015 }