nir: Just return when asked to rewrite uses of an SSA def to itself.
[mesa.git] / src / compiler / nir / nir.c
1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Connor Abbott (cwabbott0@gmail.com)
25 *
26 */
27
28 #include "nir.h"
29 #include "nir_control_flow_private.h"
30 #include "util/half_float.h"
31 #include <limits.h>
32 #include <assert.h>
33 #include <math.h>
34 #include "util/u_math.h"
35
36 #include "main/menums.h" /* BITFIELD64_MASK */
37
38 nir_shader *
39 nir_shader_create(void *mem_ctx,
40 gl_shader_stage stage,
41 const nir_shader_compiler_options *options,
42 shader_info *si)
43 {
44 nir_shader *shader = rzalloc(mem_ctx, nir_shader);
45
46 exec_list_make_empty(&shader->uniforms);
47 exec_list_make_empty(&shader->inputs);
48 exec_list_make_empty(&shader->outputs);
49 exec_list_make_empty(&shader->shared);
50
51 shader->options = options;
52
53 if (si) {
54 assert(si->stage == stage);
55 shader->info = *si;
56 } else {
57 shader->info.stage = stage;
58 }
59
60 exec_list_make_empty(&shader->functions);
61 exec_list_make_empty(&shader->registers);
62 exec_list_make_empty(&shader->globals);
63 exec_list_make_empty(&shader->system_values);
64 shader->reg_alloc = 0;
65
66 shader->num_inputs = 0;
67 shader->num_outputs = 0;
68 shader->num_uniforms = 0;
69 shader->num_shared = 0;
70
71 return shader;
72 }
73
74 static nir_register *
75 reg_create(void *mem_ctx, struct exec_list *list)
76 {
77 nir_register *reg = ralloc(mem_ctx, nir_register);
78
79 list_inithead(&reg->uses);
80 list_inithead(&reg->defs);
81 list_inithead(&reg->if_uses);
82
83 reg->num_components = 0;
84 reg->bit_size = 32;
85 reg->num_array_elems = 0;
86 reg->is_packed = false;
87 reg->name = NULL;
88
89 exec_list_push_tail(list, &reg->node);
90
91 return reg;
92 }
93
94 nir_register *
95 nir_global_reg_create(nir_shader *shader)
96 {
97 nir_register *reg = reg_create(shader, &shader->registers);
98 reg->index = shader->reg_alloc++;
99 reg->is_global = true;
100
101 return reg;
102 }
103
104 nir_register *
105 nir_local_reg_create(nir_function_impl *impl)
106 {
107 nir_register *reg = reg_create(ralloc_parent(impl), &impl->registers);
108 reg->index = impl->reg_alloc++;
109 reg->is_global = false;
110
111 return reg;
112 }
113
114 void
115 nir_reg_remove(nir_register *reg)
116 {
117 exec_node_remove(&reg->node);
118 }
119
120 void
121 nir_shader_add_variable(nir_shader *shader, nir_variable *var)
122 {
123 switch (var->data.mode) {
124 case nir_var_all:
125 assert(!"invalid mode");
126 break;
127
128 case nir_var_function_temp:
129 assert(!"nir_shader_add_variable cannot be used for local variables");
130 break;
131
132 case nir_var_shader_temp:
133 exec_list_push_tail(&shader->globals, &var->node);
134 break;
135
136 case nir_var_shader_in:
137 exec_list_push_tail(&shader->inputs, &var->node);
138 break;
139
140 case nir_var_shader_out:
141 exec_list_push_tail(&shader->outputs, &var->node);
142 break;
143
144 case nir_var_uniform:
145 case nir_var_mem_ubo:
146 case nir_var_mem_ssbo:
147 exec_list_push_tail(&shader->uniforms, &var->node);
148 break;
149
150 case nir_var_mem_shared:
151 assert(gl_shader_stage_is_compute(shader->info.stage));
152 exec_list_push_tail(&shader->shared, &var->node);
153 break;
154
155 case nir_var_mem_global:
156 assert(!"nir_shader_add_variable cannot be used for global memory");
157 break;
158
159 case nir_var_system_value:
160 exec_list_push_tail(&shader->system_values, &var->node);
161 break;
162 }
163 }
164
165 nir_variable *
166 nir_variable_create(nir_shader *shader, nir_variable_mode mode,
167 const struct glsl_type *type, const char *name)
168 {
169 nir_variable *var = rzalloc(shader, nir_variable);
170 var->name = ralloc_strdup(var, name);
171 var->type = type;
172 var->data.mode = mode;
173 var->data.how_declared = nir_var_declared_normally;
174
175 if ((mode == nir_var_shader_in &&
176 shader->info.stage != MESA_SHADER_VERTEX) ||
177 (mode == nir_var_shader_out &&
178 shader->info.stage != MESA_SHADER_FRAGMENT))
179 var->data.interpolation = INTERP_MODE_SMOOTH;
180
181 if (mode == nir_var_shader_in || mode == nir_var_uniform)
182 var->data.read_only = true;
183
184 nir_shader_add_variable(shader, var);
185
186 return var;
187 }
188
189 nir_variable *
190 nir_local_variable_create(nir_function_impl *impl,
191 const struct glsl_type *type, const char *name)
192 {
193 nir_variable *var = rzalloc(impl->function->shader, nir_variable);
194 var->name = ralloc_strdup(var, name);
195 var->type = type;
196 var->data.mode = nir_var_function_temp;
197
198 nir_function_impl_add_variable(impl, var);
199
200 return var;
201 }
202
203 nir_function *
204 nir_function_create(nir_shader *shader, const char *name)
205 {
206 nir_function *func = ralloc(shader, nir_function);
207
208 exec_list_push_tail(&shader->functions, &func->node);
209
210 func->name = ralloc_strdup(func, name);
211 func->shader = shader;
212 func->num_params = 0;
213 func->params = NULL;
214 func->impl = NULL;
215 func->is_entrypoint = false;
216
217 return func;
218 }
219
220 /* NOTE: if the instruction you are copying a src to is already added
221 * to the IR, use nir_instr_rewrite_src() instead.
222 */
223 void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx)
224 {
225 dest->is_ssa = src->is_ssa;
226 if (src->is_ssa) {
227 dest->ssa = src->ssa;
228 } else {
229 dest->reg.base_offset = src->reg.base_offset;
230 dest->reg.reg = src->reg.reg;
231 if (src->reg.indirect) {
232 dest->reg.indirect = ralloc(mem_ctx, nir_src);
233 nir_src_copy(dest->reg.indirect, src->reg.indirect, mem_ctx);
234 } else {
235 dest->reg.indirect = NULL;
236 }
237 }
238 }
239
240 void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr)
241 {
242 /* Copying an SSA definition makes no sense whatsoever. */
243 assert(!src->is_ssa);
244
245 dest->is_ssa = false;
246
247 dest->reg.base_offset = src->reg.base_offset;
248 dest->reg.reg = src->reg.reg;
249 if (src->reg.indirect) {
250 dest->reg.indirect = ralloc(instr, nir_src);
251 nir_src_copy(dest->reg.indirect, src->reg.indirect, instr);
252 } else {
253 dest->reg.indirect = NULL;
254 }
255 }
256
257 void
258 nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src,
259 nir_alu_instr *instr)
260 {
261 nir_src_copy(&dest->src, &src->src, &instr->instr);
262 dest->abs = src->abs;
263 dest->negate = src->negate;
264 for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++)
265 dest->swizzle[i] = src->swizzle[i];
266 }
267
268 void
269 nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
270 nir_alu_instr *instr)
271 {
272 nir_dest_copy(&dest->dest, &src->dest, &instr->instr);
273 dest->write_mask = src->write_mask;
274 dest->saturate = src->saturate;
275 }
276
277
278 static void
279 cf_init(nir_cf_node *node, nir_cf_node_type type)
280 {
281 exec_node_init(&node->node);
282 node->parent = NULL;
283 node->type = type;
284 }
285
286 nir_function_impl *
287 nir_function_impl_create_bare(nir_shader *shader)
288 {
289 nir_function_impl *impl = ralloc(shader, nir_function_impl);
290
291 impl->function = NULL;
292
293 cf_init(&impl->cf_node, nir_cf_node_function);
294
295 exec_list_make_empty(&impl->body);
296 exec_list_make_empty(&impl->registers);
297 exec_list_make_empty(&impl->locals);
298 impl->reg_alloc = 0;
299 impl->ssa_alloc = 0;
300 impl->valid_metadata = nir_metadata_none;
301
302 /* create start & end blocks */
303 nir_block *start_block = nir_block_create(shader);
304 nir_block *end_block = nir_block_create(shader);
305 start_block->cf_node.parent = &impl->cf_node;
306 end_block->cf_node.parent = &impl->cf_node;
307 impl->end_block = end_block;
308
309 exec_list_push_tail(&impl->body, &start_block->cf_node.node);
310
311 start_block->successors[0] = end_block;
312 _mesa_set_add(end_block->predecessors, start_block);
313 return impl;
314 }
315
316 nir_function_impl *
317 nir_function_impl_create(nir_function *function)
318 {
319 assert(function->impl == NULL);
320
321 nir_function_impl *impl = nir_function_impl_create_bare(function->shader);
322
323 function->impl = impl;
324 impl->function = function;
325
326 return impl;
327 }
328
329 nir_block *
330 nir_block_create(nir_shader *shader)
331 {
332 nir_block *block = rzalloc(shader, nir_block);
333
334 cf_init(&block->cf_node, nir_cf_node_block);
335
336 block->successors[0] = block->successors[1] = NULL;
337 block->predecessors = _mesa_pointer_set_create(block);
338 block->imm_dom = NULL;
339 /* XXX maybe it would be worth it to defer allocation? This
340 * way it doesn't get allocated for shader refs that never run
341 * nir_calc_dominance? For example, state-tracker creates an
342 * initial IR, clones that, runs appropriate lowering pass, passes
343 * to driver which does common lowering/opt, and then stores ref
344 * which is later used to do state specific lowering and futher
345 * opt. Do any of the references not need dominance metadata?
346 */
347 block->dom_frontier = _mesa_pointer_set_create(block);
348
349 exec_list_make_empty(&block->instr_list);
350
351 return block;
352 }
353
354 static inline void
355 src_init(nir_src *src)
356 {
357 src->is_ssa = false;
358 src->reg.reg = NULL;
359 src->reg.indirect = NULL;
360 src->reg.base_offset = 0;
361 }
362
363 nir_if *
364 nir_if_create(nir_shader *shader)
365 {
366 nir_if *if_stmt = ralloc(shader, nir_if);
367
368 cf_init(&if_stmt->cf_node, nir_cf_node_if);
369 src_init(&if_stmt->condition);
370
371 nir_block *then = nir_block_create(shader);
372 exec_list_make_empty(&if_stmt->then_list);
373 exec_list_push_tail(&if_stmt->then_list, &then->cf_node.node);
374 then->cf_node.parent = &if_stmt->cf_node;
375
376 nir_block *else_stmt = nir_block_create(shader);
377 exec_list_make_empty(&if_stmt->else_list);
378 exec_list_push_tail(&if_stmt->else_list, &else_stmt->cf_node.node);
379 else_stmt->cf_node.parent = &if_stmt->cf_node;
380
381 return if_stmt;
382 }
383
384 nir_loop *
385 nir_loop_create(nir_shader *shader)
386 {
387 nir_loop *loop = rzalloc(shader, nir_loop);
388
389 cf_init(&loop->cf_node, nir_cf_node_loop);
390
391 nir_block *body = nir_block_create(shader);
392 exec_list_make_empty(&loop->body);
393 exec_list_push_tail(&loop->body, &body->cf_node.node);
394 body->cf_node.parent = &loop->cf_node;
395
396 body->successors[0] = body;
397 _mesa_set_add(body->predecessors, body);
398
399 return loop;
400 }
401
402 static void
403 instr_init(nir_instr *instr, nir_instr_type type)
404 {
405 instr->type = type;
406 instr->block = NULL;
407 exec_node_init(&instr->node);
408 }
409
410 static void
411 dest_init(nir_dest *dest)
412 {
413 dest->is_ssa = false;
414 dest->reg.reg = NULL;
415 dest->reg.indirect = NULL;
416 dest->reg.base_offset = 0;
417 }
418
419 static void
420 alu_dest_init(nir_alu_dest *dest)
421 {
422 dest_init(&dest->dest);
423 dest->saturate = false;
424 dest->write_mask = 0xf;
425 }
426
427 static void
428 alu_src_init(nir_alu_src *src)
429 {
430 src_init(&src->src);
431 src->abs = src->negate = false;
432 for (int i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i)
433 src->swizzle[i] = i;
434 }
435
436 nir_alu_instr *
437 nir_alu_instr_create(nir_shader *shader, nir_op op)
438 {
439 unsigned num_srcs = nir_op_infos[op].num_inputs;
440 /* TODO: don't use rzalloc */
441 nir_alu_instr *instr =
442 rzalloc_size(shader,
443 sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src));
444
445 instr_init(&instr->instr, nir_instr_type_alu);
446 instr->op = op;
447 alu_dest_init(&instr->dest);
448 for (unsigned i = 0; i < num_srcs; i++)
449 alu_src_init(&instr->src[i]);
450
451 return instr;
452 }
453
454 nir_deref_instr *
455 nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type)
456 {
457 nir_deref_instr *instr =
458 rzalloc_size(shader, sizeof(nir_deref_instr));
459
460 instr_init(&instr->instr, nir_instr_type_deref);
461
462 instr->deref_type = deref_type;
463 if (deref_type != nir_deref_type_var)
464 src_init(&instr->parent);
465
466 if (deref_type == nir_deref_type_array ||
467 deref_type == nir_deref_type_ptr_as_array)
468 src_init(&instr->arr.index);
469
470 dest_init(&instr->dest);
471
472 return instr;
473 }
474
475 nir_jump_instr *
476 nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
477 {
478 nir_jump_instr *instr = ralloc(shader, nir_jump_instr);
479 instr_init(&instr->instr, nir_instr_type_jump);
480 instr->type = type;
481 return instr;
482 }
483
484 nir_load_const_instr *
485 nir_load_const_instr_create(nir_shader *shader, unsigned num_components,
486 unsigned bit_size)
487 {
488 nir_load_const_instr *instr = rzalloc(shader, nir_load_const_instr);
489 instr_init(&instr->instr, nir_instr_type_load_const);
490
491 nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size, NULL);
492
493 return instr;
494 }
495
496 nir_intrinsic_instr *
497 nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op)
498 {
499 unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
500 /* TODO: don't use rzalloc */
501 nir_intrinsic_instr *instr =
502 rzalloc_size(shader,
503 sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src));
504
505 instr_init(&instr->instr, nir_instr_type_intrinsic);
506 instr->intrinsic = op;
507
508 if (nir_intrinsic_infos[op].has_dest)
509 dest_init(&instr->dest);
510
511 for (unsigned i = 0; i < num_srcs; i++)
512 src_init(&instr->src[i]);
513
514 return instr;
515 }
516
517 nir_call_instr *
518 nir_call_instr_create(nir_shader *shader, nir_function *callee)
519 {
520 const unsigned num_params = callee->num_params;
521 nir_call_instr *instr =
522 rzalloc_size(shader, sizeof(*instr) +
523 num_params * sizeof(instr->params[0]));
524
525 instr_init(&instr->instr, nir_instr_type_call);
526 instr->callee = callee;
527 instr->num_params = num_params;
528 for (unsigned i = 0; i < num_params; i++)
529 src_init(&instr->params[i]);
530
531 return instr;
532 }
533
534 nir_tex_instr *
535 nir_tex_instr_create(nir_shader *shader, unsigned num_srcs)
536 {
537 nir_tex_instr *instr = rzalloc(shader, nir_tex_instr);
538 instr_init(&instr->instr, nir_instr_type_tex);
539
540 dest_init(&instr->dest);
541
542 instr->num_srcs = num_srcs;
543 instr->src = ralloc_array(instr, nir_tex_src, num_srcs);
544 for (unsigned i = 0; i < num_srcs; i++)
545 src_init(&instr->src[i].src);
546
547 instr->texture_index = 0;
548 instr->texture_array_size = 0;
549 instr->sampler_index = 0;
550
551 return instr;
552 }
553
554 void
555 nir_tex_instr_add_src(nir_tex_instr *tex,
556 nir_tex_src_type src_type,
557 nir_src src)
558 {
559 nir_tex_src *new_srcs = rzalloc_array(tex, nir_tex_src,
560 tex->num_srcs + 1);
561
562 for (unsigned i = 0; i < tex->num_srcs; i++) {
563 new_srcs[i].src_type = tex->src[i].src_type;
564 nir_instr_move_src(&tex->instr, &new_srcs[i].src,
565 &tex->src[i].src);
566 }
567
568 ralloc_free(tex->src);
569 tex->src = new_srcs;
570
571 tex->src[tex->num_srcs].src_type = src_type;
572 nir_instr_rewrite_src(&tex->instr, &tex->src[tex->num_srcs].src, src);
573 tex->num_srcs++;
574 }
575
576 void
577 nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx)
578 {
579 assert(src_idx < tex->num_srcs);
580
581 /* First rewrite the source to NIR_SRC_INIT */
582 nir_instr_rewrite_src(&tex->instr, &tex->src[src_idx].src, NIR_SRC_INIT);
583
584 /* Now, move all of the other sources down */
585 for (unsigned i = src_idx + 1; i < tex->num_srcs; i++) {
586 tex->src[i-1].src_type = tex->src[i].src_type;
587 nir_instr_move_src(&tex->instr, &tex->src[i-1].src, &tex->src[i].src);
588 }
589 tex->num_srcs--;
590 }
591
592 nir_phi_instr *
593 nir_phi_instr_create(nir_shader *shader)
594 {
595 nir_phi_instr *instr = ralloc(shader, nir_phi_instr);
596 instr_init(&instr->instr, nir_instr_type_phi);
597
598 dest_init(&instr->dest);
599 exec_list_make_empty(&instr->srcs);
600 return instr;
601 }
602
603 nir_parallel_copy_instr *
604 nir_parallel_copy_instr_create(nir_shader *shader)
605 {
606 nir_parallel_copy_instr *instr = ralloc(shader, nir_parallel_copy_instr);
607 instr_init(&instr->instr, nir_instr_type_parallel_copy);
608
609 exec_list_make_empty(&instr->entries);
610
611 return instr;
612 }
613
614 nir_ssa_undef_instr *
615 nir_ssa_undef_instr_create(nir_shader *shader,
616 unsigned num_components,
617 unsigned bit_size)
618 {
619 nir_ssa_undef_instr *instr = ralloc(shader, nir_ssa_undef_instr);
620 instr_init(&instr->instr, nir_instr_type_ssa_undef);
621
622 nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size, NULL);
623
624 return instr;
625 }
626
627 static nir_const_value
628 const_value_float(double d, unsigned bit_size)
629 {
630 nir_const_value v;
631 switch (bit_size) {
632 case 16: v.u16[0] = _mesa_float_to_half(d); break;
633 case 32: v.f32[0] = d; break;
634 case 64: v.f64[0] = d; break;
635 default:
636 unreachable("Invalid bit size");
637 }
638 return v;
639 }
640
641 static nir_const_value
642 const_value_int(int64_t i, unsigned bit_size)
643 {
644 nir_const_value v;
645 switch (bit_size) {
646 case 1: v.b[0] = i & 1; break;
647 case 8: v.i8[0] = i; break;
648 case 16: v.i16[0] = i; break;
649 case 32: v.i32[0] = i; break;
650 case 64: v.i64[0] = i; break;
651 default:
652 unreachable("Invalid bit size");
653 }
654 return v;
655 }
656
657 nir_const_value
658 nir_alu_binop_identity(nir_op binop, unsigned bit_size)
659 {
660 const int64_t max_int = (1ull << (bit_size - 1)) - 1;
661 const int64_t min_int = -max_int - 1;
662 switch (binop) {
663 case nir_op_iadd:
664 return const_value_int(0, bit_size);
665 case nir_op_fadd:
666 return const_value_float(0, bit_size);
667 case nir_op_imul:
668 return const_value_int(1, bit_size);
669 case nir_op_fmul:
670 return const_value_float(1, bit_size);
671 case nir_op_imin:
672 return const_value_int(max_int, bit_size);
673 case nir_op_umin:
674 return const_value_int(~0ull, bit_size);
675 case nir_op_fmin:
676 return const_value_float(INFINITY, bit_size);
677 case nir_op_imax:
678 return const_value_int(min_int, bit_size);
679 case nir_op_umax:
680 return const_value_int(0, bit_size);
681 case nir_op_fmax:
682 return const_value_float(-INFINITY, bit_size);
683 case nir_op_iand:
684 return const_value_int(~0ull, bit_size);
685 case nir_op_ior:
686 return const_value_int(0, bit_size);
687 case nir_op_ixor:
688 return const_value_int(0, bit_size);
689 default:
690 unreachable("Invalid reduction operation");
691 }
692 }
693
694 nir_function_impl *
695 nir_cf_node_get_function(nir_cf_node *node)
696 {
697 while (node->type != nir_cf_node_function) {
698 node = node->parent;
699 }
700
701 return nir_cf_node_as_function(node);
702 }
703
704 /* Reduces a cursor by trying to convert everything to after and trying to
705 * go up to block granularity when possible.
706 */
707 static nir_cursor
708 reduce_cursor(nir_cursor cursor)
709 {
710 switch (cursor.option) {
711 case nir_cursor_before_block:
712 assert(nir_cf_node_prev(&cursor.block->cf_node) == NULL ||
713 nir_cf_node_prev(&cursor.block->cf_node)->type != nir_cf_node_block);
714 if (exec_list_is_empty(&cursor.block->instr_list)) {
715 /* Empty block. After is as good as before. */
716 cursor.option = nir_cursor_after_block;
717 }
718 return cursor;
719
720 case nir_cursor_after_block:
721 return cursor;
722
723 case nir_cursor_before_instr: {
724 nir_instr *prev_instr = nir_instr_prev(cursor.instr);
725 if (prev_instr) {
726 /* Before this instruction is after the previous */
727 cursor.instr = prev_instr;
728 cursor.option = nir_cursor_after_instr;
729 } else {
730 /* No previous instruction. Switch to before block */
731 cursor.block = cursor.instr->block;
732 cursor.option = nir_cursor_before_block;
733 }
734 return reduce_cursor(cursor);
735 }
736
737 case nir_cursor_after_instr:
738 if (nir_instr_next(cursor.instr) == NULL) {
739 /* This is the last instruction, switch to after block */
740 cursor.option = nir_cursor_after_block;
741 cursor.block = cursor.instr->block;
742 }
743 return cursor;
744
745 default:
746 unreachable("Inavlid cursor option");
747 }
748 }
749
750 bool
751 nir_cursors_equal(nir_cursor a, nir_cursor b)
752 {
753 /* Reduced cursors should be unique */
754 a = reduce_cursor(a);
755 b = reduce_cursor(b);
756
757 return a.block == b.block && a.option == b.option;
758 }
759
760 static bool
761 add_use_cb(nir_src *src, void *state)
762 {
763 nir_instr *instr = state;
764
765 src->parent_instr = instr;
766 list_addtail(&src->use_link,
767 src->is_ssa ? &src->ssa->uses : &src->reg.reg->uses);
768
769 return true;
770 }
771
772 static bool
773 add_ssa_def_cb(nir_ssa_def *def, void *state)
774 {
775 nir_instr *instr = state;
776
777 if (instr->block && def->index == UINT_MAX) {
778 nir_function_impl *impl =
779 nir_cf_node_get_function(&instr->block->cf_node);
780
781 def->index = impl->ssa_alloc++;
782 }
783
784 return true;
785 }
786
787 static bool
788 add_reg_def_cb(nir_dest *dest, void *state)
789 {
790 nir_instr *instr = state;
791
792 if (!dest->is_ssa) {
793 dest->reg.parent_instr = instr;
794 list_addtail(&dest->reg.def_link, &dest->reg.reg->defs);
795 }
796
797 return true;
798 }
799
800 static void
801 add_defs_uses(nir_instr *instr)
802 {
803 nir_foreach_src(instr, add_use_cb, instr);
804 nir_foreach_dest(instr, add_reg_def_cb, instr);
805 nir_foreach_ssa_def(instr, add_ssa_def_cb, instr);
806 }
807
808 void
809 nir_instr_insert(nir_cursor cursor, nir_instr *instr)
810 {
811 switch (cursor.option) {
812 case nir_cursor_before_block:
813 /* Only allow inserting jumps into empty blocks. */
814 if (instr->type == nir_instr_type_jump)
815 assert(exec_list_is_empty(&cursor.block->instr_list));
816
817 instr->block = cursor.block;
818 add_defs_uses(instr);
819 exec_list_push_head(&cursor.block->instr_list, &instr->node);
820 break;
821 case nir_cursor_after_block: {
822 /* Inserting instructions after a jump is illegal. */
823 nir_instr *last = nir_block_last_instr(cursor.block);
824 assert(last == NULL || last->type != nir_instr_type_jump);
825 (void) last;
826
827 instr->block = cursor.block;
828 add_defs_uses(instr);
829 exec_list_push_tail(&cursor.block->instr_list, &instr->node);
830 break;
831 }
832 case nir_cursor_before_instr:
833 assert(instr->type != nir_instr_type_jump);
834 instr->block = cursor.instr->block;
835 add_defs_uses(instr);
836 exec_node_insert_node_before(&cursor.instr->node, &instr->node);
837 break;
838 case nir_cursor_after_instr:
839 /* Inserting instructions after a jump is illegal. */
840 assert(cursor.instr->type != nir_instr_type_jump);
841
842 /* Only allow inserting jumps at the end of the block. */
843 if (instr->type == nir_instr_type_jump)
844 assert(cursor.instr == nir_block_last_instr(cursor.instr->block));
845
846 instr->block = cursor.instr->block;
847 add_defs_uses(instr);
848 exec_node_insert_after(&cursor.instr->node, &instr->node);
849 break;
850 }
851
852 if (instr->type == nir_instr_type_jump)
853 nir_handle_add_jump(instr->block);
854 }
855
856 static bool
857 src_is_valid(const nir_src *src)
858 {
859 return src->is_ssa ? (src->ssa != NULL) : (src->reg.reg != NULL);
860 }
861
862 static bool
863 remove_use_cb(nir_src *src, void *state)
864 {
865 (void) state;
866
867 if (src_is_valid(src))
868 list_del(&src->use_link);
869
870 return true;
871 }
872
873 static bool
874 remove_def_cb(nir_dest *dest, void *state)
875 {
876 (void) state;
877
878 if (!dest->is_ssa)
879 list_del(&dest->reg.def_link);
880
881 return true;
882 }
883
884 static void
885 remove_defs_uses(nir_instr *instr)
886 {
887 nir_foreach_dest(instr, remove_def_cb, instr);
888 nir_foreach_src(instr, remove_use_cb, instr);
889 }
890
891 void nir_instr_remove_v(nir_instr *instr)
892 {
893 remove_defs_uses(instr);
894 exec_node_remove(&instr->node);
895
896 if (instr->type == nir_instr_type_jump) {
897 nir_jump_instr *jump_instr = nir_instr_as_jump(instr);
898 nir_handle_remove_jump(instr->block, jump_instr->type);
899 }
900 }
901
902 /*@}*/
903
904 void
905 nir_index_local_regs(nir_function_impl *impl)
906 {
907 unsigned index = 0;
908 foreach_list_typed(nir_register, reg, node, &impl->registers) {
909 reg->index = index++;
910 }
911 impl->reg_alloc = index;
912 }
913
914 void
915 nir_index_global_regs(nir_shader *shader)
916 {
917 unsigned index = 0;
918 foreach_list_typed(nir_register, reg, node, &shader->registers) {
919 reg->index = index++;
920 }
921 shader->reg_alloc = index;
922 }
923
924 static bool
925 visit_alu_dest(nir_alu_instr *instr, nir_foreach_dest_cb cb, void *state)
926 {
927 return cb(&instr->dest.dest, state);
928 }
929
930 static bool
931 visit_deref_dest(nir_deref_instr *instr, nir_foreach_dest_cb cb, void *state)
932 {
933 return cb(&instr->dest, state);
934 }
935
936 static bool
937 visit_intrinsic_dest(nir_intrinsic_instr *instr, nir_foreach_dest_cb cb,
938 void *state)
939 {
940 if (nir_intrinsic_infos[instr->intrinsic].has_dest)
941 return cb(&instr->dest, state);
942
943 return true;
944 }
945
946 static bool
947 visit_texture_dest(nir_tex_instr *instr, nir_foreach_dest_cb cb,
948 void *state)
949 {
950 return cb(&instr->dest, state);
951 }
952
953 static bool
954 visit_phi_dest(nir_phi_instr *instr, nir_foreach_dest_cb cb, void *state)
955 {
956 return cb(&instr->dest, state);
957 }
958
959 static bool
960 visit_parallel_copy_dest(nir_parallel_copy_instr *instr,
961 nir_foreach_dest_cb cb, void *state)
962 {
963 nir_foreach_parallel_copy_entry(entry, instr) {
964 if (!cb(&entry->dest, state))
965 return false;
966 }
967
968 return true;
969 }
970
971 bool
972 nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state)
973 {
974 switch (instr->type) {
975 case nir_instr_type_alu:
976 return visit_alu_dest(nir_instr_as_alu(instr), cb, state);
977 case nir_instr_type_deref:
978 return visit_deref_dest(nir_instr_as_deref(instr), cb, state);
979 case nir_instr_type_intrinsic:
980 return visit_intrinsic_dest(nir_instr_as_intrinsic(instr), cb, state);
981 case nir_instr_type_tex:
982 return visit_texture_dest(nir_instr_as_tex(instr), cb, state);
983 case nir_instr_type_phi:
984 return visit_phi_dest(nir_instr_as_phi(instr), cb, state);
985 case nir_instr_type_parallel_copy:
986 return visit_parallel_copy_dest(nir_instr_as_parallel_copy(instr),
987 cb, state);
988
989 case nir_instr_type_load_const:
990 case nir_instr_type_ssa_undef:
991 case nir_instr_type_call:
992 case nir_instr_type_jump:
993 break;
994
995 default:
996 unreachable("Invalid instruction type");
997 break;
998 }
999
1000 return true;
1001 }
1002
1003 struct foreach_ssa_def_state {
1004 nir_foreach_ssa_def_cb cb;
1005 void *client_state;
1006 };
1007
1008 static inline bool
1009 nir_ssa_def_visitor(nir_dest *dest, void *void_state)
1010 {
1011 struct foreach_ssa_def_state *state = void_state;
1012
1013 if (dest->is_ssa)
1014 return state->cb(&dest->ssa, state->client_state);
1015 else
1016 return true;
1017 }
1018
1019 bool
1020 nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state)
1021 {
1022 switch (instr->type) {
1023 case nir_instr_type_alu:
1024 case nir_instr_type_deref:
1025 case nir_instr_type_tex:
1026 case nir_instr_type_intrinsic:
1027 case nir_instr_type_phi:
1028 case nir_instr_type_parallel_copy: {
1029 struct foreach_ssa_def_state foreach_state = {cb, state};
1030 return nir_foreach_dest(instr, nir_ssa_def_visitor, &foreach_state);
1031 }
1032
1033 case nir_instr_type_load_const:
1034 return cb(&nir_instr_as_load_const(instr)->def, state);
1035 case nir_instr_type_ssa_undef:
1036 return cb(&nir_instr_as_ssa_undef(instr)->def, state);
1037 case nir_instr_type_call:
1038 case nir_instr_type_jump:
1039 return true;
1040 default:
1041 unreachable("Invalid instruction type");
1042 }
1043 }
1044
1045 static bool
1046 visit_src(nir_src *src, nir_foreach_src_cb cb, void *state)
1047 {
1048 if (!cb(src, state))
1049 return false;
1050 if (!src->is_ssa && src->reg.indirect)
1051 return cb(src->reg.indirect, state);
1052 return true;
1053 }
1054
1055 static bool
1056 visit_alu_src(nir_alu_instr *instr, nir_foreach_src_cb cb, void *state)
1057 {
1058 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
1059 if (!visit_src(&instr->src[i].src, cb, state))
1060 return false;
1061
1062 return true;
1063 }
1064
1065 static bool
1066 visit_deref_instr_src(nir_deref_instr *instr,
1067 nir_foreach_src_cb cb, void *state)
1068 {
1069 if (instr->deref_type != nir_deref_type_var) {
1070 if (!visit_src(&instr->parent, cb, state))
1071 return false;
1072 }
1073
1074 if (instr->deref_type == nir_deref_type_array ||
1075 instr->deref_type == nir_deref_type_ptr_as_array) {
1076 if (!visit_src(&instr->arr.index, cb, state))
1077 return false;
1078 }
1079
1080 return true;
1081 }
1082
1083 static bool
1084 visit_tex_src(nir_tex_instr *instr, nir_foreach_src_cb cb, void *state)
1085 {
1086 for (unsigned i = 0; i < instr->num_srcs; i++) {
1087 if (!visit_src(&instr->src[i].src, cb, state))
1088 return false;
1089 }
1090
1091 return true;
1092 }
1093
1094 static bool
1095 visit_intrinsic_src(nir_intrinsic_instr *instr, nir_foreach_src_cb cb,
1096 void *state)
1097 {
1098 unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
1099 for (unsigned i = 0; i < num_srcs; i++) {
1100 if (!visit_src(&instr->src[i], cb, state))
1101 return false;
1102 }
1103
1104 return true;
1105 }
1106
1107 static bool
1108 visit_call_src(nir_call_instr *instr, nir_foreach_src_cb cb, void *state)
1109 {
1110 for (unsigned i = 0; i < instr->num_params; i++) {
1111 if (!visit_src(&instr->params[i], cb, state))
1112 return false;
1113 }
1114
1115 return true;
1116 }
1117
1118 static bool
1119 visit_phi_src(nir_phi_instr *instr, nir_foreach_src_cb cb, void *state)
1120 {
1121 nir_foreach_phi_src(src, instr) {
1122 if (!visit_src(&src->src, cb, state))
1123 return false;
1124 }
1125
1126 return true;
1127 }
1128
1129 static bool
1130 visit_parallel_copy_src(nir_parallel_copy_instr *instr,
1131 nir_foreach_src_cb cb, void *state)
1132 {
1133 nir_foreach_parallel_copy_entry(entry, instr) {
1134 if (!visit_src(&entry->src, cb, state))
1135 return false;
1136 }
1137
1138 return true;
1139 }
1140
1141 typedef struct {
1142 void *state;
1143 nir_foreach_src_cb cb;
1144 } visit_dest_indirect_state;
1145
1146 static bool
1147 visit_dest_indirect(nir_dest *dest, void *_state)
1148 {
1149 visit_dest_indirect_state *state = (visit_dest_indirect_state *) _state;
1150
1151 if (!dest->is_ssa && dest->reg.indirect)
1152 return state->cb(dest->reg.indirect, state->state);
1153
1154 return true;
1155 }
1156
1157 bool
1158 nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state)
1159 {
1160 switch (instr->type) {
1161 case nir_instr_type_alu:
1162 if (!visit_alu_src(nir_instr_as_alu(instr), cb, state))
1163 return false;
1164 break;
1165 case nir_instr_type_deref:
1166 if (!visit_deref_instr_src(nir_instr_as_deref(instr), cb, state))
1167 return false;
1168 break;
1169 case nir_instr_type_intrinsic:
1170 if (!visit_intrinsic_src(nir_instr_as_intrinsic(instr), cb, state))
1171 return false;
1172 break;
1173 case nir_instr_type_tex:
1174 if (!visit_tex_src(nir_instr_as_tex(instr), cb, state))
1175 return false;
1176 break;
1177 case nir_instr_type_call:
1178 if (!visit_call_src(nir_instr_as_call(instr), cb, state))
1179 return false;
1180 break;
1181 case nir_instr_type_load_const:
1182 /* Constant load instructions have no regular sources */
1183 break;
1184 case nir_instr_type_phi:
1185 if (!visit_phi_src(nir_instr_as_phi(instr), cb, state))
1186 return false;
1187 break;
1188 case nir_instr_type_parallel_copy:
1189 if (!visit_parallel_copy_src(nir_instr_as_parallel_copy(instr),
1190 cb, state))
1191 return false;
1192 break;
1193 case nir_instr_type_jump:
1194 case nir_instr_type_ssa_undef:
1195 return true;
1196
1197 default:
1198 unreachable("Invalid instruction type");
1199 break;
1200 }
1201
1202 visit_dest_indirect_state dest_state;
1203 dest_state.state = state;
1204 dest_state.cb = cb;
1205 return nir_foreach_dest(instr, visit_dest_indirect, &dest_state);
1206 }
1207
1208 int64_t
1209 nir_src_comp_as_int(nir_src src, unsigned comp)
1210 {
1211 assert(nir_src_is_const(src));
1212 nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1213
1214 assert(comp < load->def.num_components);
1215 switch (load->def.bit_size) {
1216 /* int1_t uses 0/-1 convention */
1217 case 1: return -(int)load->value.b[comp];
1218 case 8: return load->value.i8[comp];
1219 case 16: return load->value.i16[comp];
1220 case 32: return load->value.i32[comp];
1221 case 64: return load->value.i64[comp];
1222 default:
1223 unreachable("Invalid bit size");
1224 }
1225 }
1226
1227 uint64_t
1228 nir_src_comp_as_uint(nir_src src, unsigned comp)
1229 {
1230 assert(nir_src_is_const(src));
1231 nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1232
1233 assert(comp < load->def.num_components);
1234 switch (load->def.bit_size) {
1235 case 1: return load->value.b[comp];
1236 case 8: return load->value.u8[comp];
1237 case 16: return load->value.u16[comp];
1238 case 32: return load->value.u32[comp];
1239 case 64: return load->value.u64[comp];
1240 default:
1241 unreachable("Invalid bit size");
1242 }
1243 }
1244
1245 bool
1246 nir_src_comp_as_bool(nir_src src, unsigned comp)
1247 {
1248 int64_t i = nir_src_comp_as_int(src, comp);
1249
1250 /* Booleans of any size use 0/-1 convention */
1251 assert(i == 0 || i == -1);
1252
1253 return i;
1254 }
1255
1256 double
1257 nir_src_comp_as_float(nir_src src, unsigned comp)
1258 {
1259 assert(nir_src_is_const(src));
1260 nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1261
1262 assert(comp < load->def.num_components);
1263 switch (load->def.bit_size) {
1264 case 16: return _mesa_half_to_float(load->value.u16[comp]);
1265 case 32: return load->value.f32[comp];
1266 case 64: return load->value.f64[comp];
1267 default:
1268 unreachable("Invalid bit size");
1269 }
1270 }
1271
1272 int64_t
1273 nir_src_as_int(nir_src src)
1274 {
1275 assert(nir_src_num_components(src) == 1);
1276 return nir_src_comp_as_int(src, 0);
1277 }
1278
1279 uint64_t
1280 nir_src_as_uint(nir_src src)
1281 {
1282 assert(nir_src_num_components(src) == 1);
1283 return nir_src_comp_as_uint(src, 0);
1284 }
1285
1286 bool
1287 nir_src_as_bool(nir_src src)
1288 {
1289 assert(nir_src_num_components(src) == 1);
1290 return nir_src_comp_as_bool(src, 0);
1291 }
1292
1293 double
1294 nir_src_as_float(nir_src src)
1295 {
1296 assert(nir_src_num_components(src) == 1);
1297 return nir_src_comp_as_float(src, 0);
1298 }
1299
1300 nir_const_value *
1301 nir_src_as_const_value(nir_src src)
1302 {
1303 if (!src.is_ssa)
1304 return NULL;
1305
1306 if (src.ssa->parent_instr->type != nir_instr_type_load_const)
1307 return NULL;
1308
1309 nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1310
1311 return &load->value;
1312 }
1313
1314 /**
1315 * Returns true if the source is known to be dynamically uniform. Otherwise it
1316 * returns false which means it may or may not be dynamically uniform but it
1317 * can't be determined.
1318 */
1319 bool
1320 nir_src_is_dynamically_uniform(nir_src src)
1321 {
1322 if (!src.is_ssa)
1323 return false;
1324
1325 /* Constants are trivially dynamically uniform */
1326 if (src.ssa->parent_instr->type == nir_instr_type_load_const)
1327 return true;
1328
1329 /* As are uniform variables */
1330 if (src.ssa->parent_instr->type == nir_instr_type_intrinsic) {
1331 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(src.ssa->parent_instr);
1332
1333 if (intr->intrinsic == nir_intrinsic_load_uniform)
1334 return true;
1335 }
1336
1337 /* XXX: this could have many more tests, such as when a sampler function is
1338 * called with dynamically uniform arguments.
1339 */
1340 return false;
1341 }
1342
1343 static void
1344 src_remove_all_uses(nir_src *src)
1345 {
1346 for (; src; src = src->is_ssa ? NULL : src->reg.indirect) {
1347 if (!src_is_valid(src))
1348 continue;
1349
1350 list_del(&src->use_link);
1351 }
1352 }
1353
1354 static void
1355 src_add_all_uses(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
1356 {
1357 for (; src; src = src->is_ssa ? NULL : src->reg.indirect) {
1358 if (!src_is_valid(src))
1359 continue;
1360
1361 if (parent_instr) {
1362 src->parent_instr = parent_instr;
1363 if (src->is_ssa)
1364 list_addtail(&src->use_link, &src->ssa->uses);
1365 else
1366 list_addtail(&src->use_link, &src->reg.reg->uses);
1367 } else {
1368 assert(parent_if);
1369 src->parent_if = parent_if;
1370 if (src->is_ssa)
1371 list_addtail(&src->use_link, &src->ssa->if_uses);
1372 else
1373 list_addtail(&src->use_link, &src->reg.reg->if_uses);
1374 }
1375 }
1376 }
1377
1378 void
1379 nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src)
1380 {
1381 assert(!src_is_valid(src) || src->parent_instr == instr);
1382
1383 src_remove_all_uses(src);
1384 *src = new_src;
1385 src_add_all_uses(src, instr, NULL);
1386 }
1387
1388 void
1389 nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src)
1390 {
1391 assert(!src_is_valid(dest) || dest->parent_instr == dest_instr);
1392
1393 src_remove_all_uses(dest);
1394 src_remove_all_uses(src);
1395 *dest = *src;
1396 *src = NIR_SRC_INIT;
1397 src_add_all_uses(dest, dest_instr, NULL);
1398 }
1399
1400 void
1401 nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src)
1402 {
1403 nir_src *src = &if_stmt->condition;
1404 assert(!src_is_valid(src) || src->parent_if == if_stmt);
1405
1406 src_remove_all_uses(src);
1407 *src = new_src;
1408 src_add_all_uses(src, NULL, if_stmt);
1409 }
1410
1411 void
1412 nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, nir_dest new_dest)
1413 {
1414 if (dest->is_ssa) {
1415 /* We can only overwrite an SSA destination if it has no uses. */
1416 assert(list_empty(&dest->ssa.uses) && list_empty(&dest->ssa.if_uses));
1417 } else {
1418 list_del(&dest->reg.def_link);
1419 if (dest->reg.indirect)
1420 src_remove_all_uses(dest->reg.indirect);
1421 }
1422
1423 /* We can't re-write with an SSA def */
1424 assert(!new_dest.is_ssa);
1425
1426 nir_dest_copy(dest, &new_dest, instr);
1427
1428 dest->reg.parent_instr = instr;
1429 list_addtail(&dest->reg.def_link, &new_dest.reg.reg->defs);
1430
1431 if (dest->reg.indirect)
1432 src_add_all_uses(dest->reg.indirect, instr, NULL);
1433 }
1434
1435 /* note: does *not* take ownership of 'name' */
1436 void
1437 nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
1438 unsigned num_components,
1439 unsigned bit_size, const char *name)
1440 {
1441 def->name = ralloc_strdup(instr, name);
1442 def->parent_instr = instr;
1443 list_inithead(&def->uses);
1444 list_inithead(&def->if_uses);
1445 def->num_components = num_components;
1446 def->bit_size = bit_size;
1447
1448 if (instr->block) {
1449 nir_function_impl *impl =
1450 nir_cf_node_get_function(&instr->block->cf_node);
1451
1452 def->index = impl->ssa_alloc++;
1453 } else {
1454 def->index = UINT_MAX;
1455 }
1456 }
1457
1458 /* note: does *not* take ownership of 'name' */
1459 void
1460 nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
1461 unsigned num_components, unsigned bit_size,
1462 const char *name)
1463 {
1464 dest->is_ssa = true;
1465 nir_ssa_def_init(instr, &dest->ssa, num_components, bit_size, name);
1466 }
1467
1468 void
1469 nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src)
1470 {
1471 assert(!new_src.is_ssa || def != new_src.ssa);
1472
1473 nir_foreach_use_safe(use_src, def)
1474 nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
1475
1476 nir_foreach_if_use_safe(use_src, def)
1477 nir_if_rewrite_condition(use_src->parent_if, new_src);
1478 }
1479
1480 static bool
1481 is_instr_between(nir_instr *start, nir_instr *end, nir_instr *between)
1482 {
1483 assert(start->block == end->block);
1484
1485 if (between->block != start->block)
1486 return false;
1487
1488 /* Search backwards looking for "between" */
1489 while (start != end) {
1490 if (between == end)
1491 return true;
1492
1493 end = nir_instr_prev(end);
1494 assert(end);
1495 }
1496
1497 return false;
1498 }
1499
1500 /* Replaces all uses of the given SSA def with the given source but only if
1501 * the use comes after the after_me instruction. This can be useful if you
1502 * are emitting code to fix up the result of some instruction: you can freely
1503 * use the result in that code and then call rewrite_uses_after and pass the
1504 * last fixup instruction as after_me and it will replace all of the uses you
1505 * want without touching the fixup code.
1506 *
1507 * This function assumes that after_me is in the same block as
1508 * def->parent_instr and that after_me comes after def->parent_instr.
1509 */
1510 void
1511 nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src,
1512 nir_instr *after_me)
1513 {
1514 if (new_src.is_ssa && def == new_src.ssa)
1515 return;
1516
1517 nir_foreach_use_safe(use_src, def) {
1518 assert(use_src->parent_instr != def->parent_instr);
1519 /* Since def already dominates all of its uses, the only way a use can
1520 * not be dominated by after_me is if it is between def and after_me in
1521 * the instruction list.
1522 */
1523 if (!is_instr_between(def->parent_instr, after_me, use_src->parent_instr))
1524 nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
1525 }
1526
1527 nir_foreach_if_use_safe(use_src, def)
1528 nir_if_rewrite_condition(use_src->parent_if, new_src);
1529 }
1530
1531 nir_component_mask_t
1532 nir_ssa_def_components_read(const nir_ssa_def *def)
1533 {
1534 nir_component_mask_t read_mask = 0;
1535 nir_foreach_use(use, def) {
1536 if (use->parent_instr->type == nir_instr_type_alu) {
1537 nir_alu_instr *alu = nir_instr_as_alu(use->parent_instr);
1538 nir_alu_src *alu_src = exec_node_data(nir_alu_src, use, src);
1539 int src_idx = alu_src - &alu->src[0];
1540 assert(src_idx >= 0 && src_idx < nir_op_infos[alu->op].num_inputs);
1541 read_mask |= nir_alu_instr_src_read_mask(alu, src_idx);
1542 } else {
1543 return (1 << def->num_components) - 1;
1544 }
1545 }
1546
1547 if (!list_empty(&def->if_uses))
1548 read_mask |= 1;
1549
1550 return read_mask;
1551 }
1552
1553 nir_block *
1554 nir_block_cf_tree_next(nir_block *block)
1555 {
1556 if (block == NULL) {
1557 /* nir_foreach_block_safe() will call this function on a NULL block
1558 * after the last iteration, but it won't use the result so just return
1559 * NULL here.
1560 */
1561 return NULL;
1562 }
1563
1564 nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node);
1565 if (cf_next)
1566 return nir_cf_node_cf_tree_first(cf_next);
1567
1568 nir_cf_node *parent = block->cf_node.parent;
1569
1570 switch (parent->type) {
1571 case nir_cf_node_if: {
1572 /* Are we at the end of the if? Go to the beginning of the else */
1573 nir_if *if_stmt = nir_cf_node_as_if(parent);
1574 if (block == nir_if_last_then_block(if_stmt))
1575 return nir_if_first_else_block(if_stmt);
1576
1577 assert(block == nir_if_last_else_block(if_stmt));
1578 /* fall through */
1579 }
1580
1581 case nir_cf_node_loop:
1582 return nir_cf_node_as_block(nir_cf_node_next(parent));
1583
1584 case nir_cf_node_function:
1585 return NULL;
1586
1587 default:
1588 unreachable("unknown cf node type");
1589 }
1590 }
1591
1592 nir_block *
1593 nir_block_cf_tree_prev(nir_block *block)
1594 {
1595 if (block == NULL) {
1596 /* do this for consistency with nir_block_cf_tree_next() */
1597 return NULL;
1598 }
1599
1600 nir_cf_node *cf_prev = nir_cf_node_prev(&block->cf_node);
1601 if (cf_prev)
1602 return nir_cf_node_cf_tree_last(cf_prev);
1603
1604 nir_cf_node *parent = block->cf_node.parent;
1605
1606 switch (parent->type) {
1607 case nir_cf_node_if: {
1608 /* Are we at the beginning of the else? Go to the end of the if */
1609 nir_if *if_stmt = nir_cf_node_as_if(parent);
1610 if (block == nir_if_first_else_block(if_stmt))
1611 return nir_if_last_then_block(if_stmt);
1612
1613 assert(block == nir_if_first_then_block(if_stmt));
1614 /* fall through */
1615 }
1616
1617 case nir_cf_node_loop:
1618 return nir_cf_node_as_block(nir_cf_node_prev(parent));
1619
1620 case nir_cf_node_function:
1621 return NULL;
1622
1623 default:
1624 unreachable("unknown cf node type");
1625 }
1626 }
1627
1628 nir_block *nir_cf_node_cf_tree_first(nir_cf_node *node)
1629 {
1630 switch (node->type) {
1631 case nir_cf_node_function: {
1632 nir_function_impl *impl = nir_cf_node_as_function(node);
1633 return nir_start_block(impl);
1634 }
1635
1636 case nir_cf_node_if: {
1637 nir_if *if_stmt = nir_cf_node_as_if(node);
1638 return nir_if_first_then_block(if_stmt);
1639 }
1640
1641 case nir_cf_node_loop: {
1642 nir_loop *loop = nir_cf_node_as_loop(node);
1643 return nir_loop_first_block(loop);
1644 }
1645
1646 case nir_cf_node_block: {
1647 return nir_cf_node_as_block(node);
1648 }
1649
1650 default:
1651 unreachable("unknown node type");
1652 }
1653 }
1654
1655 nir_block *nir_cf_node_cf_tree_last(nir_cf_node *node)
1656 {
1657 switch (node->type) {
1658 case nir_cf_node_function: {
1659 nir_function_impl *impl = nir_cf_node_as_function(node);
1660 return nir_impl_last_block(impl);
1661 }
1662
1663 case nir_cf_node_if: {
1664 nir_if *if_stmt = nir_cf_node_as_if(node);
1665 return nir_if_last_else_block(if_stmt);
1666 }
1667
1668 case nir_cf_node_loop: {
1669 nir_loop *loop = nir_cf_node_as_loop(node);
1670 return nir_loop_last_block(loop);
1671 }
1672
1673 case nir_cf_node_block: {
1674 return nir_cf_node_as_block(node);
1675 }
1676
1677 default:
1678 unreachable("unknown node type");
1679 }
1680 }
1681
1682 nir_block *nir_cf_node_cf_tree_next(nir_cf_node *node)
1683 {
1684 if (node->type == nir_cf_node_block)
1685 return nir_block_cf_tree_next(nir_cf_node_as_block(node));
1686 else if (node->type == nir_cf_node_function)
1687 return NULL;
1688 else
1689 return nir_cf_node_as_block(nir_cf_node_next(node));
1690 }
1691
1692 nir_if *
1693 nir_block_get_following_if(nir_block *block)
1694 {
1695 if (exec_node_is_tail_sentinel(&block->cf_node.node))
1696 return NULL;
1697
1698 if (nir_cf_node_is_last(&block->cf_node))
1699 return NULL;
1700
1701 nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
1702
1703 if (next_node->type != nir_cf_node_if)
1704 return NULL;
1705
1706 return nir_cf_node_as_if(next_node);
1707 }
1708
1709 nir_loop *
1710 nir_block_get_following_loop(nir_block *block)
1711 {
1712 if (exec_node_is_tail_sentinel(&block->cf_node.node))
1713 return NULL;
1714
1715 if (nir_cf_node_is_last(&block->cf_node))
1716 return NULL;
1717
1718 nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
1719
1720 if (next_node->type != nir_cf_node_loop)
1721 return NULL;
1722
1723 return nir_cf_node_as_loop(next_node);
1724 }
1725
1726 void
1727 nir_index_blocks(nir_function_impl *impl)
1728 {
1729 unsigned index = 0;
1730
1731 if (impl->valid_metadata & nir_metadata_block_index)
1732 return;
1733
1734 nir_foreach_block(block, impl) {
1735 block->index = index++;
1736 }
1737
1738 /* The end_block isn't really part of the program, which is why its index
1739 * is >= num_blocks.
1740 */
1741 impl->num_blocks = impl->end_block->index = index;
1742 }
1743
1744 static bool
1745 index_ssa_def_cb(nir_ssa_def *def, void *state)
1746 {
1747 unsigned *index = (unsigned *) state;
1748 def->index = (*index)++;
1749
1750 return true;
1751 }
1752
1753 /**
1754 * The indices are applied top-to-bottom which has the very nice property
1755 * that, if A dominates B, then A->index <= B->index.
1756 */
1757 void
1758 nir_index_ssa_defs(nir_function_impl *impl)
1759 {
1760 unsigned index = 0;
1761
1762 nir_foreach_block(block, impl) {
1763 nir_foreach_instr(instr, block)
1764 nir_foreach_ssa_def(instr, index_ssa_def_cb, &index);
1765 }
1766
1767 impl->ssa_alloc = index;
1768 }
1769
1770 /**
1771 * The indices are applied top-to-bottom which has the very nice property
1772 * that, if A dominates B, then A->index <= B->index.
1773 */
1774 unsigned
1775 nir_index_instrs(nir_function_impl *impl)
1776 {
1777 unsigned index = 0;
1778
1779 nir_foreach_block(block, impl) {
1780 nir_foreach_instr(instr, block)
1781 instr->index = index++;
1782 }
1783
1784 return index;
1785 }
1786
1787 nir_intrinsic_op
1788 nir_intrinsic_from_system_value(gl_system_value val)
1789 {
1790 switch (val) {
1791 case SYSTEM_VALUE_VERTEX_ID:
1792 return nir_intrinsic_load_vertex_id;
1793 case SYSTEM_VALUE_INSTANCE_ID:
1794 return nir_intrinsic_load_instance_id;
1795 case SYSTEM_VALUE_DRAW_ID:
1796 return nir_intrinsic_load_draw_id;
1797 case SYSTEM_VALUE_BASE_INSTANCE:
1798 return nir_intrinsic_load_base_instance;
1799 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
1800 return nir_intrinsic_load_vertex_id_zero_base;
1801 case SYSTEM_VALUE_IS_INDEXED_DRAW:
1802 return nir_intrinsic_load_is_indexed_draw;
1803 case SYSTEM_VALUE_FIRST_VERTEX:
1804 return nir_intrinsic_load_first_vertex;
1805 case SYSTEM_VALUE_BASE_VERTEX:
1806 return nir_intrinsic_load_base_vertex;
1807 case SYSTEM_VALUE_INVOCATION_ID:
1808 return nir_intrinsic_load_invocation_id;
1809 case SYSTEM_VALUE_FRAG_COORD:
1810 return nir_intrinsic_load_frag_coord;
1811 case SYSTEM_VALUE_FRONT_FACE:
1812 return nir_intrinsic_load_front_face;
1813 case SYSTEM_VALUE_SAMPLE_ID:
1814 return nir_intrinsic_load_sample_id;
1815 case SYSTEM_VALUE_SAMPLE_POS:
1816 return nir_intrinsic_load_sample_pos;
1817 case SYSTEM_VALUE_SAMPLE_MASK_IN:
1818 return nir_intrinsic_load_sample_mask_in;
1819 case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
1820 return nir_intrinsic_load_local_invocation_id;
1821 case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX:
1822 return nir_intrinsic_load_local_invocation_index;
1823 case SYSTEM_VALUE_WORK_GROUP_ID:
1824 return nir_intrinsic_load_work_group_id;
1825 case SYSTEM_VALUE_NUM_WORK_GROUPS:
1826 return nir_intrinsic_load_num_work_groups;
1827 case SYSTEM_VALUE_PRIMITIVE_ID:
1828 return nir_intrinsic_load_primitive_id;
1829 case SYSTEM_VALUE_TESS_COORD:
1830 return nir_intrinsic_load_tess_coord;
1831 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1832 return nir_intrinsic_load_tess_level_outer;
1833 case SYSTEM_VALUE_TESS_LEVEL_INNER:
1834 return nir_intrinsic_load_tess_level_inner;
1835 case SYSTEM_VALUE_VERTICES_IN:
1836 return nir_intrinsic_load_patch_vertices_in;
1837 case SYSTEM_VALUE_HELPER_INVOCATION:
1838 return nir_intrinsic_load_helper_invocation;
1839 case SYSTEM_VALUE_VIEW_INDEX:
1840 return nir_intrinsic_load_view_index;
1841 case SYSTEM_VALUE_SUBGROUP_SIZE:
1842 return nir_intrinsic_load_subgroup_size;
1843 case SYSTEM_VALUE_SUBGROUP_INVOCATION:
1844 return nir_intrinsic_load_subgroup_invocation;
1845 case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
1846 return nir_intrinsic_load_subgroup_eq_mask;
1847 case SYSTEM_VALUE_SUBGROUP_GE_MASK:
1848 return nir_intrinsic_load_subgroup_ge_mask;
1849 case SYSTEM_VALUE_SUBGROUP_GT_MASK:
1850 return nir_intrinsic_load_subgroup_gt_mask;
1851 case SYSTEM_VALUE_SUBGROUP_LE_MASK:
1852 return nir_intrinsic_load_subgroup_le_mask;
1853 case SYSTEM_VALUE_SUBGROUP_LT_MASK:
1854 return nir_intrinsic_load_subgroup_lt_mask;
1855 case SYSTEM_VALUE_NUM_SUBGROUPS:
1856 return nir_intrinsic_load_num_subgroups;
1857 case SYSTEM_VALUE_SUBGROUP_ID:
1858 return nir_intrinsic_load_subgroup_id;
1859 case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
1860 return nir_intrinsic_load_local_group_size;
1861 case SYSTEM_VALUE_GLOBAL_INVOCATION_ID:
1862 return nir_intrinsic_load_global_invocation_id;
1863 case SYSTEM_VALUE_WORK_DIM:
1864 return nir_intrinsic_load_work_dim;
1865 default:
1866 unreachable("system value does not directly correspond to intrinsic");
1867 }
1868 }
1869
1870 gl_system_value
1871 nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
1872 {
1873 switch (intrin) {
1874 case nir_intrinsic_load_vertex_id:
1875 return SYSTEM_VALUE_VERTEX_ID;
1876 case nir_intrinsic_load_instance_id:
1877 return SYSTEM_VALUE_INSTANCE_ID;
1878 case nir_intrinsic_load_draw_id:
1879 return SYSTEM_VALUE_DRAW_ID;
1880 case nir_intrinsic_load_base_instance:
1881 return SYSTEM_VALUE_BASE_INSTANCE;
1882 case nir_intrinsic_load_vertex_id_zero_base:
1883 return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
1884 case nir_intrinsic_load_first_vertex:
1885 return SYSTEM_VALUE_FIRST_VERTEX;
1886 case nir_intrinsic_load_is_indexed_draw:
1887 return SYSTEM_VALUE_IS_INDEXED_DRAW;
1888 case nir_intrinsic_load_base_vertex:
1889 return SYSTEM_VALUE_BASE_VERTEX;
1890 case nir_intrinsic_load_invocation_id:
1891 return SYSTEM_VALUE_INVOCATION_ID;
1892 case nir_intrinsic_load_frag_coord:
1893 return SYSTEM_VALUE_FRAG_COORD;
1894 case nir_intrinsic_load_front_face:
1895 return SYSTEM_VALUE_FRONT_FACE;
1896 case nir_intrinsic_load_sample_id:
1897 return SYSTEM_VALUE_SAMPLE_ID;
1898 case nir_intrinsic_load_sample_pos:
1899 return SYSTEM_VALUE_SAMPLE_POS;
1900 case nir_intrinsic_load_sample_mask_in:
1901 return SYSTEM_VALUE_SAMPLE_MASK_IN;
1902 case nir_intrinsic_load_local_invocation_id:
1903 return SYSTEM_VALUE_LOCAL_INVOCATION_ID;
1904 case nir_intrinsic_load_local_invocation_index:
1905 return SYSTEM_VALUE_LOCAL_INVOCATION_INDEX;
1906 case nir_intrinsic_load_num_work_groups:
1907 return SYSTEM_VALUE_NUM_WORK_GROUPS;
1908 case nir_intrinsic_load_work_group_id:
1909 return SYSTEM_VALUE_WORK_GROUP_ID;
1910 case nir_intrinsic_load_primitive_id:
1911 return SYSTEM_VALUE_PRIMITIVE_ID;
1912 case nir_intrinsic_load_tess_coord:
1913 return SYSTEM_VALUE_TESS_COORD;
1914 case nir_intrinsic_load_tess_level_outer:
1915 return SYSTEM_VALUE_TESS_LEVEL_OUTER;
1916 case nir_intrinsic_load_tess_level_inner:
1917 return SYSTEM_VALUE_TESS_LEVEL_INNER;
1918 case nir_intrinsic_load_patch_vertices_in:
1919 return SYSTEM_VALUE_VERTICES_IN;
1920 case nir_intrinsic_load_helper_invocation:
1921 return SYSTEM_VALUE_HELPER_INVOCATION;
1922 case nir_intrinsic_load_view_index:
1923 return SYSTEM_VALUE_VIEW_INDEX;
1924 case nir_intrinsic_load_subgroup_size:
1925 return SYSTEM_VALUE_SUBGROUP_SIZE;
1926 case nir_intrinsic_load_subgroup_invocation:
1927 return SYSTEM_VALUE_SUBGROUP_INVOCATION;
1928 case nir_intrinsic_load_subgroup_eq_mask:
1929 return SYSTEM_VALUE_SUBGROUP_EQ_MASK;
1930 case nir_intrinsic_load_subgroup_ge_mask:
1931 return SYSTEM_VALUE_SUBGROUP_GE_MASK;
1932 case nir_intrinsic_load_subgroup_gt_mask:
1933 return SYSTEM_VALUE_SUBGROUP_GT_MASK;
1934 case nir_intrinsic_load_subgroup_le_mask:
1935 return SYSTEM_VALUE_SUBGROUP_LE_MASK;
1936 case nir_intrinsic_load_subgroup_lt_mask:
1937 return SYSTEM_VALUE_SUBGROUP_LT_MASK;
1938 case nir_intrinsic_load_num_subgroups:
1939 return SYSTEM_VALUE_NUM_SUBGROUPS;
1940 case nir_intrinsic_load_subgroup_id:
1941 return SYSTEM_VALUE_SUBGROUP_ID;
1942 case nir_intrinsic_load_local_group_size:
1943 return SYSTEM_VALUE_LOCAL_GROUP_SIZE;
1944 case nir_intrinsic_load_global_invocation_id:
1945 return SYSTEM_VALUE_GLOBAL_INVOCATION_ID;
1946 default:
1947 unreachable("intrinsic doesn't produce a system value");
1948 }
1949 }
1950
1951 /* OpenGL utility method that remaps the location attributes if they are
1952 * doubles. Not needed for vulkan due the differences on the input location
1953 * count for doubles on vulkan vs OpenGL
1954 *
1955 * The bitfield returned in dual_slot is one bit for each double input slot in
1956 * the original OpenGL single-slot input numbering. The mapping from old
1957 * locations to new locations is as follows:
1958 *
1959 * new_loc = loc + util_bitcount(dual_slot & BITFIELD64_MASK(loc))
1960 */
1961 void
1962 nir_remap_dual_slot_attributes(nir_shader *shader, uint64_t *dual_slot)
1963 {
1964 assert(shader->info.stage == MESA_SHADER_VERTEX);
1965
1966 *dual_slot = 0;
1967 nir_foreach_variable(var, &shader->inputs) {
1968 if (glsl_type_is_dual_slot(glsl_without_array(var->type))) {
1969 unsigned slots = glsl_count_attribute_slots(var->type, true);
1970 *dual_slot |= BITFIELD64_MASK(slots) << var->data.location;
1971 }
1972 }
1973
1974 nir_foreach_variable(var, &shader->inputs) {
1975 var->data.location +=
1976 util_bitcount64(*dual_slot & BITFIELD64_MASK(var->data.location));
1977 }
1978 }
1979
1980 /* Returns an attribute mask that has been re-compacted using the given
1981 * dual_slot mask.
1982 */
1983 uint64_t
1984 nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot)
1985 {
1986 while (dual_slot) {
1987 unsigned loc = u_bit_scan64(&dual_slot);
1988 /* mask of all bits up to and including loc */
1989 uint64_t mask = BITFIELD64_MASK(loc + 1);
1990 attribs = (attribs & mask) | ((attribs & ~mask) >> 1);
1991 }
1992 return attribs;
1993 }