nir: detect more dynamically uniform expressions
[mesa.git] / src / compiler / nir / nir.c
1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Connor Abbott (cwabbott0@gmail.com)
25 *
26 */
27
28 #include "nir.h"
29 #include "nir_control_flow_private.h"
30 #include "util/half_float.h"
31 #include <limits.h>
32 #include <assert.h>
33 #include <math.h>
34 #include "util/u_math.h"
35
36 #include "main/menums.h" /* BITFIELD64_MASK */
37
38 nir_shader *
39 nir_shader_create(void *mem_ctx,
40 gl_shader_stage stage,
41 const nir_shader_compiler_options *options,
42 shader_info *si)
43 {
44 nir_shader *shader = rzalloc(mem_ctx, nir_shader);
45
46 exec_list_make_empty(&shader->uniforms);
47 exec_list_make_empty(&shader->inputs);
48 exec_list_make_empty(&shader->outputs);
49 exec_list_make_empty(&shader->shared);
50
51 shader->options = options;
52
53 if (si) {
54 assert(si->stage == stage);
55 shader->info = *si;
56 } else {
57 shader->info.stage = stage;
58 }
59
60 exec_list_make_empty(&shader->functions);
61 exec_list_make_empty(&shader->globals);
62 exec_list_make_empty(&shader->system_values);
63
64 shader->num_inputs = 0;
65 shader->num_outputs = 0;
66 shader->num_uniforms = 0;
67 shader->num_shared = 0;
68
69 return shader;
70 }
71
72 static nir_register *
73 reg_create(void *mem_ctx, struct exec_list *list)
74 {
75 nir_register *reg = ralloc(mem_ctx, nir_register);
76
77 list_inithead(&reg->uses);
78 list_inithead(&reg->defs);
79 list_inithead(&reg->if_uses);
80
81 reg->num_components = 0;
82 reg->bit_size = 32;
83 reg->num_array_elems = 0;
84 reg->name = NULL;
85
86 exec_list_push_tail(list, &reg->node);
87
88 return reg;
89 }
90
91 nir_register *
92 nir_local_reg_create(nir_function_impl *impl)
93 {
94 nir_register *reg = reg_create(ralloc_parent(impl), &impl->registers);
95 reg->index = impl->reg_alloc++;
96
97 return reg;
98 }
99
100 void
101 nir_reg_remove(nir_register *reg)
102 {
103 exec_node_remove(&reg->node);
104 }
105
106 void
107 nir_shader_add_variable(nir_shader *shader, nir_variable *var)
108 {
109 switch (var->data.mode) {
110 case nir_var_all:
111 assert(!"invalid mode");
112 break;
113
114 case nir_var_function_temp:
115 assert(!"nir_shader_add_variable cannot be used for local variables");
116 break;
117
118 case nir_var_shader_temp:
119 exec_list_push_tail(&shader->globals, &var->node);
120 break;
121
122 case nir_var_shader_in:
123 exec_list_push_tail(&shader->inputs, &var->node);
124 break;
125
126 case nir_var_shader_out:
127 exec_list_push_tail(&shader->outputs, &var->node);
128 break;
129
130 case nir_var_uniform:
131 case nir_var_mem_ubo:
132 case nir_var_mem_ssbo:
133 exec_list_push_tail(&shader->uniforms, &var->node);
134 break;
135
136 case nir_var_mem_shared:
137 assert(gl_shader_stage_is_compute(shader->info.stage));
138 exec_list_push_tail(&shader->shared, &var->node);
139 break;
140
141 case nir_var_mem_global:
142 assert(!"nir_shader_add_variable cannot be used for global memory");
143 break;
144
145 case nir_var_system_value:
146 exec_list_push_tail(&shader->system_values, &var->node);
147 break;
148 }
149 }
150
151 nir_variable *
152 nir_variable_create(nir_shader *shader, nir_variable_mode mode,
153 const struct glsl_type *type, const char *name)
154 {
155 nir_variable *var = rzalloc(shader, nir_variable);
156 var->name = ralloc_strdup(var, name);
157 var->type = type;
158 var->data.mode = mode;
159 var->data.how_declared = nir_var_declared_normally;
160
161 if ((mode == nir_var_shader_in &&
162 shader->info.stage != MESA_SHADER_VERTEX) ||
163 (mode == nir_var_shader_out &&
164 shader->info.stage != MESA_SHADER_FRAGMENT))
165 var->data.interpolation = INTERP_MODE_SMOOTH;
166
167 if (mode == nir_var_shader_in || mode == nir_var_uniform)
168 var->data.read_only = true;
169
170 nir_shader_add_variable(shader, var);
171
172 return var;
173 }
174
175 nir_variable *
176 nir_local_variable_create(nir_function_impl *impl,
177 const struct glsl_type *type, const char *name)
178 {
179 nir_variable *var = rzalloc(impl->function->shader, nir_variable);
180 var->name = ralloc_strdup(var, name);
181 var->type = type;
182 var->data.mode = nir_var_function_temp;
183
184 nir_function_impl_add_variable(impl, var);
185
186 return var;
187 }
188
189 nir_function *
190 nir_function_create(nir_shader *shader, const char *name)
191 {
192 nir_function *func = ralloc(shader, nir_function);
193
194 exec_list_push_tail(&shader->functions, &func->node);
195
196 func->name = ralloc_strdup(func, name);
197 func->shader = shader;
198 func->num_params = 0;
199 func->params = NULL;
200 func->impl = NULL;
201 func->is_entrypoint = false;
202
203 return func;
204 }
205
206 /* NOTE: if the instruction you are copying a src to is already added
207 * to the IR, use nir_instr_rewrite_src() instead.
208 */
209 void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx)
210 {
211 dest->is_ssa = src->is_ssa;
212 if (src->is_ssa) {
213 dest->ssa = src->ssa;
214 } else {
215 dest->reg.base_offset = src->reg.base_offset;
216 dest->reg.reg = src->reg.reg;
217 if (src->reg.indirect) {
218 dest->reg.indirect = ralloc(mem_ctx, nir_src);
219 nir_src_copy(dest->reg.indirect, src->reg.indirect, mem_ctx);
220 } else {
221 dest->reg.indirect = NULL;
222 }
223 }
224 }
225
226 void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr)
227 {
228 /* Copying an SSA definition makes no sense whatsoever. */
229 assert(!src->is_ssa);
230
231 dest->is_ssa = false;
232
233 dest->reg.base_offset = src->reg.base_offset;
234 dest->reg.reg = src->reg.reg;
235 if (src->reg.indirect) {
236 dest->reg.indirect = ralloc(instr, nir_src);
237 nir_src_copy(dest->reg.indirect, src->reg.indirect, instr);
238 } else {
239 dest->reg.indirect = NULL;
240 }
241 }
242
243 void
244 nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src,
245 nir_alu_instr *instr)
246 {
247 nir_src_copy(&dest->src, &src->src, &instr->instr);
248 dest->abs = src->abs;
249 dest->negate = src->negate;
250 for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++)
251 dest->swizzle[i] = src->swizzle[i];
252 }
253
254 void
255 nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
256 nir_alu_instr *instr)
257 {
258 nir_dest_copy(&dest->dest, &src->dest, &instr->instr);
259 dest->write_mask = src->write_mask;
260 dest->saturate = src->saturate;
261 }
262
263
264 static void
265 cf_init(nir_cf_node *node, nir_cf_node_type type)
266 {
267 exec_node_init(&node->node);
268 node->parent = NULL;
269 node->type = type;
270 }
271
272 nir_function_impl *
273 nir_function_impl_create_bare(nir_shader *shader)
274 {
275 nir_function_impl *impl = ralloc(shader, nir_function_impl);
276
277 impl->function = NULL;
278
279 cf_init(&impl->cf_node, nir_cf_node_function);
280
281 exec_list_make_empty(&impl->body);
282 exec_list_make_empty(&impl->registers);
283 exec_list_make_empty(&impl->locals);
284 impl->reg_alloc = 0;
285 impl->ssa_alloc = 0;
286 impl->valid_metadata = nir_metadata_none;
287
288 /* create start & end blocks */
289 nir_block *start_block = nir_block_create(shader);
290 nir_block *end_block = nir_block_create(shader);
291 start_block->cf_node.parent = &impl->cf_node;
292 end_block->cf_node.parent = &impl->cf_node;
293 impl->end_block = end_block;
294
295 exec_list_push_tail(&impl->body, &start_block->cf_node.node);
296
297 start_block->successors[0] = end_block;
298 _mesa_set_add(end_block->predecessors, start_block);
299 return impl;
300 }
301
302 nir_function_impl *
303 nir_function_impl_create(nir_function *function)
304 {
305 assert(function->impl == NULL);
306
307 nir_function_impl *impl = nir_function_impl_create_bare(function->shader);
308
309 function->impl = impl;
310 impl->function = function;
311
312 return impl;
313 }
314
315 nir_block *
316 nir_block_create(nir_shader *shader)
317 {
318 nir_block *block = rzalloc(shader, nir_block);
319
320 cf_init(&block->cf_node, nir_cf_node_block);
321
322 block->successors[0] = block->successors[1] = NULL;
323 block->predecessors = _mesa_pointer_set_create(block);
324 block->imm_dom = NULL;
325 /* XXX maybe it would be worth it to defer allocation? This
326 * way it doesn't get allocated for shader refs that never run
327 * nir_calc_dominance? For example, state-tracker creates an
328 * initial IR, clones that, runs appropriate lowering pass, passes
329 * to driver which does common lowering/opt, and then stores ref
330 * which is later used to do state specific lowering and futher
331 * opt. Do any of the references not need dominance metadata?
332 */
333 block->dom_frontier = _mesa_pointer_set_create(block);
334
335 exec_list_make_empty(&block->instr_list);
336
337 return block;
338 }
339
340 static inline void
341 src_init(nir_src *src)
342 {
343 src->is_ssa = false;
344 src->reg.reg = NULL;
345 src->reg.indirect = NULL;
346 src->reg.base_offset = 0;
347 }
348
349 nir_if *
350 nir_if_create(nir_shader *shader)
351 {
352 nir_if *if_stmt = ralloc(shader, nir_if);
353
354 if_stmt->control = nir_selection_control_none;
355
356 cf_init(&if_stmt->cf_node, nir_cf_node_if);
357 src_init(&if_stmt->condition);
358
359 nir_block *then = nir_block_create(shader);
360 exec_list_make_empty(&if_stmt->then_list);
361 exec_list_push_tail(&if_stmt->then_list, &then->cf_node.node);
362 then->cf_node.parent = &if_stmt->cf_node;
363
364 nir_block *else_stmt = nir_block_create(shader);
365 exec_list_make_empty(&if_stmt->else_list);
366 exec_list_push_tail(&if_stmt->else_list, &else_stmt->cf_node.node);
367 else_stmt->cf_node.parent = &if_stmt->cf_node;
368
369 return if_stmt;
370 }
371
372 nir_loop *
373 nir_loop_create(nir_shader *shader)
374 {
375 nir_loop *loop = rzalloc(shader, nir_loop);
376
377 cf_init(&loop->cf_node, nir_cf_node_loop);
378
379 nir_block *body = nir_block_create(shader);
380 exec_list_make_empty(&loop->body);
381 exec_list_push_tail(&loop->body, &body->cf_node.node);
382 body->cf_node.parent = &loop->cf_node;
383
384 body->successors[0] = body;
385 _mesa_set_add(body->predecessors, body);
386
387 return loop;
388 }
389
390 static void
391 instr_init(nir_instr *instr, nir_instr_type type)
392 {
393 instr->type = type;
394 instr->block = NULL;
395 exec_node_init(&instr->node);
396 }
397
398 static void
399 dest_init(nir_dest *dest)
400 {
401 dest->is_ssa = false;
402 dest->reg.reg = NULL;
403 dest->reg.indirect = NULL;
404 dest->reg.base_offset = 0;
405 }
406
407 static void
408 alu_dest_init(nir_alu_dest *dest)
409 {
410 dest_init(&dest->dest);
411 dest->saturate = false;
412 dest->write_mask = 0xf;
413 }
414
415 static void
416 alu_src_init(nir_alu_src *src)
417 {
418 src_init(&src->src);
419 src->abs = src->negate = false;
420 for (int i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i)
421 src->swizzle[i] = i;
422 }
423
424 nir_alu_instr *
425 nir_alu_instr_create(nir_shader *shader, nir_op op)
426 {
427 unsigned num_srcs = nir_op_infos[op].num_inputs;
428 /* TODO: don't use rzalloc */
429 nir_alu_instr *instr =
430 rzalloc_size(shader,
431 sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src));
432
433 instr_init(&instr->instr, nir_instr_type_alu);
434 instr->op = op;
435 alu_dest_init(&instr->dest);
436 for (unsigned i = 0; i < num_srcs; i++)
437 alu_src_init(&instr->src[i]);
438
439 return instr;
440 }
441
442 nir_deref_instr *
443 nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type)
444 {
445 nir_deref_instr *instr =
446 rzalloc_size(shader, sizeof(nir_deref_instr));
447
448 instr_init(&instr->instr, nir_instr_type_deref);
449
450 instr->deref_type = deref_type;
451 if (deref_type != nir_deref_type_var)
452 src_init(&instr->parent);
453
454 if (deref_type == nir_deref_type_array ||
455 deref_type == nir_deref_type_ptr_as_array)
456 src_init(&instr->arr.index);
457
458 dest_init(&instr->dest);
459
460 return instr;
461 }
462
463 nir_jump_instr *
464 nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
465 {
466 nir_jump_instr *instr = ralloc(shader, nir_jump_instr);
467 instr_init(&instr->instr, nir_instr_type_jump);
468 instr->type = type;
469 return instr;
470 }
471
472 nir_load_const_instr *
473 nir_load_const_instr_create(nir_shader *shader, unsigned num_components,
474 unsigned bit_size)
475 {
476 nir_load_const_instr *instr =
477 rzalloc_size(shader, sizeof(*instr) + num_components * sizeof(*instr->value));
478 instr_init(&instr->instr, nir_instr_type_load_const);
479
480 nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size, NULL);
481
482 return instr;
483 }
484
485 nir_intrinsic_instr *
486 nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op)
487 {
488 unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
489 /* TODO: don't use rzalloc */
490 nir_intrinsic_instr *instr =
491 rzalloc_size(shader,
492 sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src));
493
494 instr_init(&instr->instr, nir_instr_type_intrinsic);
495 instr->intrinsic = op;
496
497 if (nir_intrinsic_infos[op].has_dest)
498 dest_init(&instr->dest);
499
500 for (unsigned i = 0; i < num_srcs; i++)
501 src_init(&instr->src[i]);
502
503 return instr;
504 }
505
506 nir_call_instr *
507 nir_call_instr_create(nir_shader *shader, nir_function *callee)
508 {
509 const unsigned num_params = callee->num_params;
510 nir_call_instr *instr =
511 rzalloc_size(shader, sizeof(*instr) +
512 num_params * sizeof(instr->params[0]));
513
514 instr_init(&instr->instr, nir_instr_type_call);
515 instr->callee = callee;
516 instr->num_params = num_params;
517 for (unsigned i = 0; i < num_params; i++)
518 src_init(&instr->params[i]);
519
520 return instr;
521 }
522
523 static int8_t default_tg4_offsets[4][2] =
524 {
525 { 0, 1 },
526 { 1, 1 },
527 { 1, 0 },
528 { 0, 0 },
529 };
530
531 nir_tex_instr *
532 nir_tex_instr_create(nir_shader *shader, unsigned num_srcs)
533 {
534 nir_tex_instr *instr = rzalloc(shader, nir_tex_instr);
535 instr_init(&instr->instr, nir_instr_type_tex);
536
537 dest_init(&instr->dest);
538
539 instr->num_srcs = num_srcs;
540 instr->src = ralloc_array(instr, nir_tex_src, num_srcs);
541 for (unsigned i = 0; i < num_srcs; i++)
542 src_init(&instr->src[i].src);
543
544 instr->texture_index = 0;
545 instr->texture_array_size = 0;
546 instr->sampler_index = 0;
547 memcpy(instr->tg4_offsets, default_tg4_offsets, sizeof(instr->tg4_offsets));
548
549 return instr;
550 }
551
552 void
553 nir_tex_instr_add_src(nir_tex_instr *tex,
554 nir_tex_src_type src_type,
555 nir_src src)
556 {
557 nir_tex_src *new_srcs = rzalloc_array(tex, nir_tex_src,
558 tex->num_srcs + 1);
559
560 for (unsigned i = 0; i < tex->num_srcs; i++) {
561 new_srcs[i].src_type = tex->src[i].src_type;
562 nir_instr_move_src(&tex->instr, &new_srcs[i].src,
563 &tex->src[i].src);
564 }
565
566 ralloc_free(tex->src);
567 tex->src = new_srcs;
568
569 tex->src[tex->num_srcs].src_type = src_type;
570 nir_instr_rewrite_src(&tex->instr, &tex->src[tex->num_srcs].src, src);
571 tex->num_srcs++;
572 }
573
574 void
575 nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx)
576 {
577 assert(src_idx < tex->num_srcs);
578
579 /* First rewrite the source to NIR_SRC_INIT */
580 nir_instr_rewrite_src(&tex->instr, &tex->src[src_idx].src, NIR_SRC_INIT);
581
582 /* Now, move all of the other sources down */
583 for (unsigned i = src_idx + 1; i < tex->num_srcs; i++) {
584 tex->src[i-1].src_type = tex->src[i].src_type;
585 nir_instr_move_src(&tex->instr, &tex->src[i-1].src, &tex->src[i].src);
586 }
587 tex->num_srcs--;
588 }
589
590 bool
591 nir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex)
592 {
593 if (tex->op != nir_texop_tg4)
594 return false;
595 return memcmp(tex->tg4_offsets, default_tg4_offsets,
596 sizeof(tex->tg4_offsets)) != 0;
597 }
598
599 nir_phi_instr *
600 nir_phi_instr_create(nir_shader *shader)
601 {
602 nir_phi_instr *instr = ralloc(shader, nir_phi_instr);
603 instr_init(&instr->instr, nir_instr_type_phi);
604
605 dest_init(&instr->dest);
606 exec_list_make_empty(&instr->srcs);
607 return instr;
608 }
609
610 nir_parallel_copy_instr *
611 nir_parallel_copy_instr_create(nir_shader *shader)
612 {
613 nir_parallel_copy_instr *instr = ralloc(shader, nir_parallel_copy_instr);
614 instr_init(&instr->instr, nir_instr_type_parallel_copy);
615
616 exec_list_make_empty(&instr->entries);
617
618 return instr;
619 }
620
621 nir_ssa_undef_instr *
622 nir_ssa_undef_instr_create(nir_shader *shader,
623 unsigned num_components,
624 unsigned bit_size)
625 {
626 nir_ssa_undef_instr *instr = ralloc(shader, nir_ssa_undef_instr);
627 instr_init(&instr->instr, nir_instr_type_ssa_undef);
628
629 nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size, NULL);
630
631 return instr;
632 }
633
634 static nir_const_value
635 const_value_float(double d, unsigned bit_size)
636 {
637 nir_const_value v;
638 memset(&v, 0, sizeof(v));
639 switch (bit_size) {
640 case 16: v.u16 = _mesa_float_to_half(d); break;
641 case 32: v.f32 = d; break;
642 case 64: v.f64 = d; break;
643 default:
644 unreachable("Invalid bit size");
645 }
646 return v;
647 }
648
649 static nir_const_value
650 const_value_int(int64_t i, unsigned bit_size)
651 {
652 nir_const_value v;
653 memset(&v, 0, sizeof(v));
654 switch (bit_size) {
655 case 1: v.b = i & 1; break;
656 case 8: v.i8 = i; break;
657 case 16: v.i16 = i; break;
658 case 32: v.i32 = i; break;
659 case 64: v.i64 = i; break;
660 default:
661 unreachable("Invalid bit size");
662 }
663 return v;
664 }
665
666 nir_const_value
667 nir_alu_binop_identity(nir_op binop, unsigned bit_size)
668 {
669 const int64_t max_int = (1ull << (bit_size - 1)) - 1;
670 const int64_t min_int = -max_int - 1;
671 switch (binop) {
672 case nir_op_iadd:
673 return const_value_int(0, bit_size);
674 case nir_op_fadd:
675 return const_value_float(0, bit_size);
676 case nir_op_imul:
677 return const_value_int(1, bit_size);
678 case nir_op_fmul:
679 return const_value_float(1, bit_size);
680 case nir_op_imin:
681 return const_value_int(max_int, bit_size);
682 case nir_op_umin:
683 return const_value_int(~0ull, bit_size);
684 case nir_op_fmin:
685 return const_value_float(INFINITY, bit_size);
686 case nir_op_imax:
687 return const_value_int(min_int, bit_size);
688 case nir_op_umax:
689 return const_value_int(0, bit_size);
690 case nir_op_fmax:
691 return const_value_float(-INFINITY, bit_size);
692 case nir_op_iand:
693 return const_value_int(~0ull, bit_size);
694 case nir_op_ior:
695 return const_value_int(0, bit_size);
696 case nir_op_ixor:
697 return const_value_int(0, bit_size);
698 default:
699 unreachable("Invalid reduction operation");
700 }
701 }
702
703 nir_function_impl *
704 nir_cf_node_get_function(nir_cf_node *node)
705 {
706 while (node->type != nir_cf_node_function) {
707 node = node->parent;
708 }
709
710 return nir_cf_node_as_function(node);
711 }
712
713 /* Reduces a cursor by trying to convert everything to after and trying to
714 * go up to block granularity when possible.
715 */
716 static nir_cursor
717 reduce_cursor(nir_cursor cursor)
718 {
719 switch (cursor.option) {
720 case nir_cursor_before_block:
721 assert(nir_cf_node_prev(&cursor.block->cf_node) == NULL ||
722 nir_cf_node_prev(&cursor.block->cf_node)->type != nir_cf_node_block);
723 if (exec_list_is_empty(&cursor.block->instr_list)) {
724 /* Empty block. After is as good as before. */
725 cursor.option = nir_cursor_after_block;
726 }
727 return cursor;
728
729 case nir_cursor_after_block:
730 return cursor;
731
732 case nir_cursor_before_instr: {
733 nir_instr *prev_instr = nir_instr_prev(cursor.instr);
734 if (prev_instr) {
735 /* Before this instruction is after the previous */
736 cursor.instr = prev_instr;
737 cursor.option = nir_cursor_after_instr;
738 } else {
739 /* No previous instruction. Switch to before block */
740 cursor.block = cursor.instr->block;
741 cursor.option = nir_cursor_before_block;
742 }
743 return reduce_cursor(cursor);
744 }
745
746 case nir_cursor_after_instr:
747 if (nir_instr_next(cursor.instr) == NULL) {
748 /* This is the last instruction, switch to after block */
749 cursor.option = nir_cursor_after_block;
750 cursor.block = cursor.instr->block;
751 }
752 return cursor;
753
754 default:
755 unreachable("Inavlid cursor option");
756 }
757 }
758
759 bool
760 nir_cursors_equal(nir_cursor a, nir_cursor b)
761 {
762 /* Reduced cursors should be unique */
763 a = reduce_cursor(a);
764 b = reduce_cursor(b);
765
766 return a.block == b.block && a.option == b.option;
767 }
768
769 static bool
770 add_use_cb(nir_src *src, void *state)
771 {
772 nir_instr *instr = state;
773
774 src->parent_instr = instr;
775 list_addtail(&src->use_link,
776 src->is_ssa ? &src->ssa->uses : &src->reg.reg->uses);
777
778 return true;
779 }
780
781 static bool
782 add_ssa_def_cb(nir_ssa_def *def, void *state)
783 {
784 nir_instr *instr = state;
785
786 if (instr->block && def->index == UINT_MAX) {
787 nir_function_impl *impl =
788 nir_cf_node_get_function(&instr->block->cf_node);
789
790 def->index = impl->ssa_alloc++;
791 }
792
793 return true;
794 }
795
796 static bool
797 add_reg_def_cb(nir_dest *dest, void *state)
798 {
799 nir_instr *instr = state;
800
801 if (!dest->is_ssa) {
802 dest->reg.parent_instr = instr;
803 list_addtail(&dest->reg.def_link, &dest->reg.reg->defs);
804 }
805
806 return true;
807 }
808
809 static void
810 add_defs_uses(nir_instr *instr)
811 {
812 nir_foreach_src(instr, add_use_cb, instr);
813 nir_foreach_dest(instr, add_reg_def_cb, instr);
814 nir_foreach_ssa_def(instr, add_ssa_def_cb, instr);
815 }
816
817 void
818 nir_instr_insert(nir_cursor cursor, nir_instr *instr)
819 {
820 switch (cursor.option) {
821 case nir_cursor_before_block:
822 /* Only allow inserting jumps into empty blocks. */
823 if (instr->type == nir_instr_type_jump)
824 assert(exec_list_is_empty(&cursor.block->instr_list));
825
826 instr->block = cursor.block;
827 add_defs_uses(instr);
828 exec_list_push_head(&cursor.block->instr_list, &instr->node);
829 break;
830 case nir_cursor_after_block: {
831 /* Inserting instructions after a jump is illegal. */
832 nir_instr *last = nir_block_last_instr(cursor.block);
833 assert(last == NULL || last->type != nir_instr_type_jump);
834 (void) last;
835
836 instr->block = cursor.block;
837 add_defs_uses(instr);
838 exec_list_push_tail(&cursor.block->instr_list, &instr->node);
839 break;
840 }
841 case nir_cursor_before_instr:
842 assert(instr->type != nir_instr_type_jump);
843 instr->block = cursor.instr->block;
844 add_defs_uses(instr);
845 exec_node_insert_node_before(&cursor.instr->node, &instr->node);
846 break;
847 case nir_cursor_after_instr:
848 /* Inserting instructions after a jump is illegal. */
849 assert(cursor.instr->type != nir_instr_type_jump);
850
851 /* Only allow inserting jumps at the end of the block. */
852 if (instr->type == nir_instr_type_jump)
853 assert(cursor.instr == nir_block_last_instr(cursor.instr->block));
854
855 instr->block = cursor.instr->block;
856 add_defs_uses(instr);
857 exec_node_insert_after(&cursor.instr->node, &instr->node);
858 break;
859 }
860
861 if (instr->type == nir_instr_type_jump)
862 nir_handle_add_jump(instr->block);
863 }
864
865 static bool
866 src_is_valid(const nir_src *src)
867 {
868 return src->is_ssa ? (src->ssa != NULL) : (src->reg.reg != NULL);
869 }
870
871 static bool
872 remove_use_cb(nir_src *src, void *state)
873 {
874 (void) state;
875
876 if (src_is_valid(src))
877 list_del(&src->use_link);
878
879 return true;
880 }
881
882 static bool
883 remove_def_cb(nir_dest *dest, void *state)
884 {
885 (void) state;
886
887 if (!dest->is_ssa)
888 list_del(&dest->reg.def_link);
889
890 return true;
891 }
892
893 static void
894 remove_defs_uses(nir_instr *instr)
895 {
896 nir_foreach_dest(instr, remove_def_cb, instr);
897 nir_foreach_src(instr, remove_use_cb, instr);
898 }
899
900 void nir_instr_remove_v(nir_instr *instr)
901 {
902 remove_defs_uses(instr);
903 exec_node_remove(&instr->node);
904
905 if (instr->type == nir_instr_type_jump) {
906 nir_jump_instr *jump_instr = nir_instr_as_jump(instr);
907 nir_handle_remove_jump(instr->block, jump_instr->type);
908 }
909 }
910
911 /*@}*/
912
913 void
914 nir_index_local_regs(nir_function_impl *impl)
915 {
916 unsigned index = 0;
917 foreach_list_typed(nir_register, reg, node, &impl->registers) {
918 reg->index = index++;
919 }
920 impl->reg_alloc = index;
921 }
922
923 static bool
924 visit_alu_dest(nir_alu_instr *instr, nir_foreach_dest_cb cb, void *state)
925 {
926 return cb(&instr->dest.dest, state);
927 }
928
929 static bool
930 visit_deref_dest(nir_deref_instr *instr, nir_foreach_dest_cb cb, void *state)
931 {
932 return cb(&instr->dest, state);
933 }
934
935 static bool
936 visit_intrinsic_dest(nir_intrinsic_instr *instr, nir_foreach_dest_cb cb,
937 void *state)
938 {
939 if (nir_intrinsic_infos[instr->intrinsic].has_dest)
940 return cb(&instr->dest, state);
941
942 return true;
943 }
944
945 static bool
946 visit_texture_dest(nir_tex_instr *instr, nir_foreach_dest_cb cb,
947 void *state)
948 {
949 return cb(&instr->dest, state);
950 }
951
952 static bool
953 visit_phi_dest(nir_phi_instr *instr, nir_foreach_dest_cb cb, void *state)
954 {
955 return cb(&instr->dest, state);
956 }
957
958 static bool
959 visit_parallel_copy_dest(nir_parallel_copy_instr *instr,
960 nir_foreach_dest_cb cb, void *state)
961 {
962 nir_foreach_parallel_copy_entry(entry, instr) {
963 if (!cb(&entry->dest, state))
964 return false;
965 }
966
967 return true;
968 }
969
970 bool
971 nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state)
972 {
973 switch (instr->type) {
974 case nir_instr_type_alu:
975 return visit_alu_dest(nir_instr_as_alu(instr), cb, state);
976 case nir_instr_type_deref:
977 return visit_deref_dest(nir_instr_as_deref(instr), cb, state);
978 case nir_instr_type_intrinsic:
979 return visit_intrinsic_dest(nir_instr_as_intrinsic(instr), cb, state);
980 case nir_instr_type_tex:
981 return visit_texture_dest(nir_instr_as_tex(instr), cb, state);
982 case nir_instr_type_phi:
983 return visit_phi_dest(nir_instr_as_phi(instr), cb, state);
984 case nir_instr_type_parallel_copy:
985 return visit_parallel_copy_dest(nir_instr_as_parallel_copy(instr),
986 cb, state);
987
988 case nir_instr_type_load_const:
989 case nir_instr_type_ssa_undef:
990 case nir_instr_type_call:
991 case nir_instr_type_jump:
992 break;
993
994 default:
995 unreachable("Invalid instruction type");
996 break;
997 }
998
999 return true;
1000 }
1001
1002 struct foreach_ssa_def_state {
1003 nir_foreach_ssa_def_cb cb;
1004 void *client_state;
1005 };
1006
1007 static inline bool
1008 nir_ssa_def_visitor(nir_dest *dest, void *void_state)
1009 {
1010 struct foreach_ssa_def_state *state = void_state;
1011
1012 if (dest->is_ssa)
1013 return state->cb(&dest->ssa, state->client_state);
1014 else
1015 return true;
1016 }
1017
1018 bool
1019 nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state)
1020 {
1021 switch (instr->type) {
1022 case nir_instr_type_alu:
1023 case nir_instr_type_deref:
1024 case nir_instr_type_tex:
1025 case nir_instr_type_intrinsic:
1026 case nir_instr_type_phi:
1027 case nir_instr_type_parallel_copy: {
1028 struct foreach_ssa_def_state foreach_state = {cb, state};
1029 return nir_foreach_dest(instr, nir_ssa_def_visitor, &foreach_state);
1030 }
1031
1032 case nir_instr_type_load_const:
1033 return cb(&nir_instr_as_load_const(instr)->def, state);
1034 case nir_instr_type_ssa_undef:
1035 return cb(&nir_instr_as_ssa_undef(instr)->def, state);
1036 case nir_instr_type_call:
1037 case nir_instr_type_jump:
1038 return true;
1039 default:
1040 unreachable("Invalid instruction type");
1041 }
1042 }
1043
1044 static bool
1045 visit_src(nir_src *src, nir_foreach_src_cb cb, void *state)
1046 {
1047 if (!cb(src, state))
1048 return false;
1049 if (!src->is_ssa && src->reg.indirect)
1050 return cb(src->reg.indirect, state);
1051 return true;
1052 }
1053
1054 static bool
1055 visit_alu_src(nir_alu_instr *instr, nir_foreach_src_cb cb, void *state)
1056 {
1057 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
1058 if (!visit_src(&instr->src[i].src, cb, state))
1059 return false;
1060
1061 return true;
1062 }
1063
1064 static bool
1065 visit_deref_instr_src(nir_deref_instr *instr,
1066 nir_foreach_src_cb cb, void *state)
1067 {
1068 if (instr->deref_type != nir_deref_type_var) {
1069 if (!visit_src(&instr->parent, cb, state))
1070 return false;
1071 }
1072
1073 if (instr->deref_type == nir_deref_type_array ||
1074 instr->deref_type == nir_deref_type_ptr_as_array) {
1075 if (!visit_src(&instr->arr.index, cb, state))
1076 return false;
1077 }
1078
1079 return true;
1080 }
1081
1082 static bool
1083 visit_tex_src(nir_tex_instr *instr, nir_foreach_src_cb cb, void *state)
1084 {
1085 for (unsigned i = 0; i < instr->num_srcs; i++) {
1086 if (!visit_src(&instr->src[i].src, cb, state))
1087 return false;
1088 }
1089
1090 return true;
1091 }
1092
1093 static bool
1094 visit_intrinsic_src(nir_intrinsic_instr *instr, nir_foreach_src_cb cb,
1095 void *state)
1096 {
1097 unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
1098 for (unsigned i = 0; i < num_srcs; i++) {
1099 if (!visit_src(&instr->src[i], cb, state))
1100 return false;
1101 }
1102
1103 return true;
1104 }
1105
1106 static bool
1107 visit_call_src(nir_call_instr *instr, nir_foreach_src_cb cb, void *state)
1108 {
1109 for (unsigned i = 0; i < instr->num_params; i++) {
1110 if (!visit_src(&instr->params[i], cb, state))
1111 return false;
1112 }
1113
1114 return true;
1115 }
1116
1117 static bool
1118 visit_phi_src(nir_phi_instr *instr, nir_foreach_src_cb cb, void *state)
1119 {
1120 nir_foreach_phi_src(src, instr) {
1121 if (!visit_src(&src->src, cb, state))
1122 return false;
1123 }
1124
1125 return true;
1126 }
1127
1128 static bool
1129 visit_parallel_copy_src(nir_parallel_copy_instr *instr,
1130 nir_foreach_src_cb cb, void *state)
1131 {
1132 nir_foreach_parallel_copy_entry(entry, instr) {
1133 if (!visit_src(&entry->src, cb, state))
1134 return false;
1135 }
1136
1137 return true;
1138 }
1139
1140 typedef struct {
1141 void *state;
1142 nir_foreach_src_cb cb;
1143 } visit_dest_indirect_state;
1144
1145 static bool
1146 visit_dest_indirect(nir_dest *dest, void *_state)
1147 {
1148 visit_dest_indirect_state *state = (visit_dest_indirect_state *) _state;
1149
1150 if (!dest->is_ssa && dest->reg.indirect)
1151 return state->cb(dest->reg.indirect, state->state);
1152
1153 return true;
1154 }
1155
1156 bool
1157 nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state)
1158 {
1159 switch (instr->type) {
1160 case nir_instr_type_alu:
1161 if (!visit_alu_src(nir_instr_as_alu(instr), cb, state))
1162 return false;
1163 break;
1164 case nir_instr_type_deref:
1165 if (!visit_deref_instr_src(nir_instr_as_deref(instr), cb, state))
1166 return false;
1167 break;
1168 case nir_instr_type_intrinsic:
1169 if (!visit_intrinsic_src(nir_instr_as_intrinsic(instr), cb, state))
1170 return false;
1171 break;
1172 case nir_instr_type_tex:
1173 if (!visit_tex_src(nir_instr_as_tex(instr), cb, state))
1174 return false;
1175 break;
1176 case nir_instr_type_call:
1177 if (!visit_call_src(nir_instr_as_call(instr), cb, state))
1178 return false;
1179 break;
1180 case nir_instr_type_load_const:
1181 /* Constant load instructions have no regular sources */
1182 break;
1183 case nir_instr_type_phi:
1184 if (!visit_phi_src(nir_instr_as_phi(instr), cb, state))
1185 return false;
1186 break;
1187 case nir_instr_type_parallel_copy:
1188 if (!visit_parallel_copy_src(nir_instr_as_parallel_copy(instr),
1189 cb, state))
1190 return false;
1191 break;
1192 case nir_instr_type_jump:
1193 case nir_instr_type_ssa_undef:
1194 return true;
1195
1196 default:
1197 unreachable("Invalid instruction type");
1198 break;
1199 }
1200
1201 visit_dest_indirect_state dest_state;
1202 dest_state.state = state;
1203 dest_state.cb = cb;
1204 return nir_foreach_dest(instr, visit_dest_indirect, &dest_state);
1205 }
1206
1207 int64_t
1208 nir_src_comp_as_int(nir_src src, unsigned comp)
1209 {
1210 assert(nir_src_is_const(src));
1211 nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1212
1213 assert(comp < load->def.num_components);
1214 switch (load->def.bit_size) {
1215 /* int1_t uses 0/-1 convention */
1216 case 1: return -(int)load->value[comp].b;
1217 case 8: return load->value[comp].i8;
1218 case 16: return load->value[comp].i16;
1219 case 32: return load->value[comp].i32;
1220 case 64: return load->value[comp].i64;
1221 default:
1222 unreachable("Invalid bit size");
1223 }
1224 }
1225
1226 uint64_t
1227 nir_src_comp_as_uint(nir_src src, unsigned comp)
1228 {
1229 assert(nir_src_is_const(src));
1230 nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1231
1232 assert(comp < load->def.num_components);
1233 switch (load->def.bit_size) {
1234 case 1: return load->value[comp].b;
1235 case 8: return load->value[comp].u8;
1236 case 16: return load->value[comp].u16;
1237 case 32: return load->value[comp].u32;
1238 case 64: return load->value[comp].u64;
1239 default:
1240 unreachable("Invalid bit size");
1241 }
1242 }
1243
1244 bool
1245 nir_src_comp_as_bool(nir_src src, unsigned comp)
1246 {
1247 int64_t i = nir_src_comp_as_int(src, comp);
1248
1249 /* Booleans of any size use 0/-1 convention */
1250 assert(i == 0 || i == -1);
1251
1252 return i;
1253 }
1254
1255 double
1256 nir_src_comp_as_float(nir_src src, unsigned comp)
1257 {
1258 assert(nir_src_is_const(src));
1259 nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1260
1261 assert(comp < load->def.num_components);
1262 switch (load->def.bit_size) {
1263 case 16: return _mesa_half_to_float(load->value[comp].u16);
1264 case 32: return load->value[comp].f32;
1265 case 64: return load->value[comp].f64;
1266 default:
1267 unreachable("Invalid bit size");
1268 }
1269 }
1270
1271 int64_t
1272 nir_src_as_int(nir_src src)
1273 {
1274 assert(nir_src_num_components(src) == 1);
1275 return nir_src_comp_as_int(src, 0);
1276 }
1277
1278 uint64_t
1279 nir_src_as_uint(nir_src src)
1280 {
1281 assert(nir_src_num_components(src) == 1);
1282 return nir_src_comp_as_uint(src, 0);
1283 }
1284
1285 bool
1286 nir_src_as_bool(nir_src src)
1287 {
1288 assert(nir_src_num_components(src) == 1);
1289 return nir_src_comp_as_bool(src, 0);
1290 }
1291
1292 double
1293 nir_src_as_float(nir_src src)
1294 {
1295 assert(nir_src_num_components(src) == 1);
1296 return nir_src_comp_as_float(src, 0);
1297 }
1298
1299 nir_const_value *
1300 nir_src_as_const_value(nir_src src)
1301 {
1302 if (!src.is_ssa)
1303 return NULL;
1304
1305 if (src.ssa->parent_instr->type != nir_instr_type_load_const)
1306 return NULL;
1307
1308 nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1309
1310 return load->value;
1311 }
1312
1313 /**
1314 * Returns true if the source is known to be dynamically uniform. Otherwise it
1315 * returns false which means it may or may not be dynamically uniform but it
1316 * can't be determined.
1317 */
1318 bool
1319 nir_src_is_dynamically_uniform(nir_src src)
1320 {
1321 if (!src.is_ssa)
1322 return false;
1323
1324 /* Constants are trivially dynamically uniform */
1325 if (src.ssa->parent_instr->type == nir_instr_type_load_const)
1326 return true;
1327
1328 /* As are uniform variables */
1329 if (src.ssa->parent_instr->type == nir_instr_type_intrinsic) {
1330 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(src.ssa->parent_instr);
1331
1332 if (intr->intrinsic == nir_intrinsic_load_uniform)
1333 return true;
1334 }
1335
1336 /* Operating together dynamically uniform expressions produces a
1337 * dynamically uniform result
1338 */
1339 if (src.ssa->parent_instr->type == nir_instr_type_alu) {
1340 nir_alu_instr *alu = nir_instr_as_alu(src.ssa->parent_instr);
1341 for (int i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
1342 if (!nir_src_is_dynamically_uniform(alu->src[i].src))
1343 return false;
1344 }
1345
1346 return true;
1347 }
1348
1349 /* XXX: this could have many more tests, such as when a sampler function is
1350 * called with dynamically uniform arguments.
1351 */
1352 return false;
1353 }
1354
1355 static void
1356 src_remove_all_uses(nir_src *src)
1357 {
1358 for (; src; src = src->is_ssa ? NULL : src->reg.indirect) {
1359 if (!src_is_valid(src))
1360 continue;
1361
1362 list_del(&src->use_link);
1363 }
1364 }
1365
1366 static void
1367 src_add_all_uses(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
1368 {
1369 for (; src; src = src->is_ssa ? NULL : src->reg.indirect) {
1370 if (!src_is_valid(src))
1371 continue;
1372
1373 if (parent_instr) {
1374 src->parent_instr = parent_instr;
1375 if (src->is_ssa)
1376 list_addtail(&src->use_link, &src->ssa->uses);
1377 else
1378 list_addtail(&src->use_link, &src->reg.reg->uses);
1379 } else {
1380 assert(parent_if);
1381 src->parent_if = parent_if;
1382 if (src->is_ssa)
1383 list_addtail(&src->use_link, &src->ssa->if_uses);
1384 else
1385 list_addtail(&src->use_link, &src->reg.reg->if_uses);
1386 }
1387 }
1388 }
1389
1390 void
1391 nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src)
1392 {
1393 assert(!src_is_valid(src) || src->parent_instr == instr);
1394
1395 src_remove_all_uses(src);
1396 *src = new_src;
1397 src_add_all_uses(src, instr, NULL);
1398 }
1399
1400 void
1401 nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src)
1402 {
1403 assert(!src_is_valid(dest) || dest->parent_instr == dest_instr);
1404
1405 src_remove_all_uses(dest);
1406 src_remove_all_uses(src);
1407 *dest = *src;
1408 *src = NIR_SRC_INIT;
1409 src_add_all_uses(dest, dest_instr, NULL);
1410 }
1411
1412 void
1413 nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src)
1414 {
1415 nir_src *src = &if_stmt->condition;
1416 assert(!src_is_valid(src) || src->parent_if == if_stmt);
1417
1418 src_remove_all_uses(src);
1419 *src = new_src;
1420 src_add_all_uses(src, NULL, if_stmt);
1421 }
1422
1423 void
1424 nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, nir_dest new_dest)
1425 {
1426 if (dest->is_ssa) {
1427 /* We can only overwrite an SSA destination if it has no uses. */
1428 assert(list_empty(&dest->ssa.uses) && list_empty(&dest->ssa.if_uses));
1429 } else {
1430 list_del(&dest->reg.def_link);
1431 if (dest->reg.indirect)
1432 src_remove_all_uses(dest->reg.indirect);
1433 }
1434
1435 /* We can't re-write with an SSA def */
1436 assert(!new_dest.is_ssa);
1437
1438 nir_dest_copy(dest, &new_dest, instr);
1439
1440 dest->reg.parent_instr = instr;
1441 list_addtail(&dest->reg.def_link, &new_dest.reg.reg->defs);
1442
1443 if (dest->reg.indirect)
1444 src_add_all_uses(dest->reg.indirect, instr, NULL);
1445 }
1446
1447 /* note: does *not* take ownership of 'name' */
1448 void
1449 nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
1450 unsigned num_components,
1451 unsigned bit_size, const char *name)
1452 {
1453 def->name = ralloc_strdup(instr, name);
1454 def->parent_instr = instr;
1455 list_inithead(&def->uses);
1456 list_inithead(&def->if_uses);
1457 def->num_components = num_components;
1458 def->bit_size = bit_size;
1459
1460 if (instr->block) {
1461 nir_function_impl *impl =
1462 nir_cf_node_get_function(&instr->block->cf_node);
1463
1464 def->index = impl->ssa_alloc++;
1465 } else {
1466 def->index = UINT_MAX;
1467 }
1468 }
1469
1470 /* note: does *not* take ownership of 'name' */
1471 void
1472 nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
1473 unsigned num_components, unsigned bit_size,
1474 const char *name)
1475 {
1476 dest->is_ssa = true;
1477 nir_ssa_def_init(instr, &dest->ssa, num_components, bit_size, name);
1478 }
1479
1480 void
1481 nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src)
1482 {
1483 assert(!new_src.is_ssa || def != new_src.ssa);
1484
1485 nir_foreach_use_safe(use_src, def)
1486 nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
1487
1488 nir_foreach_if_use_safe(use_src, def)
1489 nir_if_rewrite_condition(use_src->parent_if, new_src);
1490 }
1491
1492 static bool
1493 is_instr_between(nir_instr *start, nir_instr *end, nir_instr *between)
1494 {
1495 assert(start->block == end->block);
1496
1497 if (between->block != start->block)
1498 return false;
1499
1500 /* Search backwards looking for "between" */
1501 while (start != end) {
1502 if (between == end)
1503 return true;
1504
1505 end = nir_instr_prev(end);
1506 assert(end);
1507 }
1508
1509 return false;
1510 }
1511
1512 /* Replaces all uses of the given SSA def with the given source but only if
1513 * the use comes after the after_me instruction. This can be useful if you
1514 * are emitting code to fix up the result of some instruction: you can freely
1515 * use the result in that code and then call rewrite_uses_after and pass the
1516 * last fixup instruction as after_me and it will replace all of the uses you
1517 * want without touching the fixup code.
1518 *
1519 * This function assumes that after_me is in the same block as
1520 * def->parent_instr and that after_me comes after def->parent_instr.
1521 */
1522 void
1523 nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src,
1524 nir_instr *after_me)
1525 {
1526 if (new_src.is_ssa && def == new_src.ssa)
1527 return;
1528
1529 nir_foreach_use_safe(use_src, def) {
1530 assert(use_src->parent_instr != def->parent_instr);
1531 /* Since def already dominates all of its uses, the only way a use can
1532 * not be dominated by after_me is if it is between def and after_me in
1533 * the instruction list.
1534 */
1535 if (!is_instr_between(def->parent_instr, after_me, use_src->parent_instr))
1536 nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
1537 }
1538
1539 nir_foreach_if_use_safe(use_src, def)
1540 nir_if_rewrite_condition(use_src->parent_if, new_src);
1541 }
1542
1543 nir_component_mask_t
1544 nir_ssa_def_components_read(const nir_ssa_def *def)
1545 {
1546 nir_component_mask_t read_mask = 0;
1547 nir_foreach_use(use, def) {
1548 if (use->parent_instr->type == nir_instr_type_alu) {
1549 nir_alu_instr *alu = nir_instr_as_alu(use->parent_instr);
1550 nir_alu_src *alu_src = exec_node_data(nir_alu_src, use, src);
1551 int src_idx = alu_src - &alu->src[0];
1552 assert(src_idx >= 0 && src_idx < nir_op_infos[alu->op].num_inputs);
1553 read_mask |= nir_alu_instr_src_read_mask(alu, src_idx);
1554 } else {
1555 return (1 << def->num_components) - 1;
1556 }
1557 }
1558
1559 if (!list_empty(&def->if_uses))
1560 read_mask |= 1;
1561
1562 return read_mask;
1563 }
1564
1565 nir_block *
1566 nir_block_cf_tree_next(nir_block *block)
1567 {
1568 if (block == NULL) {
1569 /* nir_foreach_block_safe() will call this function on a NULL block
1570 * after the last iteration, but it won't use the result so just return
1571 * NULL here.
1572 */
1573 return NULL;
1574 }
1575
1576 nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node);
1577 if (cf_next)
1578 return nir_cf_node_cf_tree_first(cf_next);
1579
1580 nir_cf_node *parent = block->cf_node.parent;
1581
1582 switch (parent->type) {
1583 case nir_cf_node_if: {
1584 /* Are we at the end of the if? Go to the beginning of the else */
1585 nir_if *if_stmt = nir_cf_node_as_if(parent);
1586 if (block == nir_if_last_then_block(if_stmt))
1587 return nir_if_first_else_block(if_stmt);
1588
1589 assert(block == nir_if_last_else_block(if_stmt));
1590 /* fall through */
1591 }
1592
1593 case nir_cf_node_loop:
1594 return nir_cf_node_as_block(nir_cf_node_next(parent));
1595
1596 case nir_cf_node_function:
1597 return NULL;
1598
1599 default:
1600 unreachable("unknown cf node type");
1601 }
1602 }
1603
1604 nir_block *
1605 nir_block_cf_tree_prev(nir_block *block)
1606 {
1607 if (block == NULL) {
1608 /* do this for consistency with nir_block_cf_tree_next() */
1609 return NULL;
1610 }
1611
1612 nir_cf_node *cf_prev = nir_cf_node_prev(&block->cf_node);
1613 if (cf_prev)
1614 return nir_cf_node_cf_tree_last(cf_prev);
1615
1616 nir_cf_node *parent = block->cf_node.parent;
1617
1618 switch (parent->type) {
1619 case nir_cf_node_if: {
1620 /* Are we at the beginning of the else? Go to the end of the if */
1621 nir_if *if_stmt = nir_cf_node_as_if(parent);
1622 if (block == nir_if_first_else_block(if_stmt))
1623 return nir_if_last_then_block(if_stmt);
1624
1625 assert(block == nir_if_first_then_block(if_stmt));
1626 /* fall through */
1627 }
1628
1629 case nir_cf_node_loop:
1630 return nir_cf_node_as_block(nir_cf_node_prev(parent));
1631
1632 case nir_cf_node_function:
1633 return NULL;
1634
1635 default:
1636 unreachable("unknown cf node type");
1637 }
1638 }
1639
1640 nir_block *nir_cf_node_cf_tree_first(nir_cf_node *node)
1641 {
1642 switch (node->type) {
1643 case nir_cf_node_function: {
1644 nir_function_impl *impl = nir_cf_node_as_function(node);
1645 return nir_start_block(impl);
1646 }
1647
1648 case nir_cf_node_if: {
1649 nir_if *if_stmt = nir_cf_node_as_if(node);
1650 return nir_if_first_then_block(if_stmt);
1651 }
1652
1653 case nir_cf_node_loop: {
1654 nir_loop *loop = nir_cf_node_as_loop(node);
1655 return nir_loop_first_block(loop);
1656 }
1657
1658 case nir_cf_node_block: {
1659 return nir_cf_node_as_block(node);
1660 }
1661
1662 default:
1663 unreachable("unknown node type");
1664 }
1665 }
1666
1667 nir_block *nir_cf_node_cf_tree_last(nir_cf_node *node)
1668 {
1669 switch (node->type) {
1670 case nir_cf_node_function: {
1671 nir_function_impl *impl = nir_cf_node_as_function(node);
1672 return nir_impl_last_block(impl);
1673 }
1674
1675 case nir_cf_node_if: {
1676 nir_if *if_stmt = nir_cf_node_as_if(node);
1677 return nir_if_last_else_block(if_stmt);
1678 }
1679
1680 case nir_cf_node_loop: {
1681 nir_loop *loop = nir_cf_node_as_loop(node);
1682 return nir_loop_last_block(loop);
1683 }
1684
1685 case nir_cf_node_block: {
1686 return nir_cf_node_as_block(node);
1687 }
1688
1689 default:
1690 unreachable("unknown node type");
1691 }
1692 }
1693
1694 nir_block *nir_cf_node_cf_tree_next(nir_cf_node *node)
1695 {
1696 if (node->type == nir_cf_node_block)
1697 return nir_block_cf_tree_next(nir_cf_node_as_block(node));
1698 else if (node->type == nir_cf_node_function)
1699 return NULL;
1700 else
1701 return nir_cf_node_as_block(nir_cf_node_next(node));
1702 }
1703
1704 nir_if *
1705 nir_block_get_following_if(nir_block *block)
1706 {
1707 if (exec_node_is_tail_sentinel(&block->cf_node.node))
1708 return NULL;
1709
1710 if (nir_cf_node_is_last(&block->cf_node))
1711 return NULL;
1712
1713 nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
1714
1715 if (next_node->type != nir_cf_node_if)
1716 return NULL;
1717
1718 return nir_cf_node_as_if(next_node);
1719 }
1720
1721 nir_loop *
1722 nir_block_get_following_loop(nir_block *block)
1723 {
1724 if (exec_node_is_tail_sentinel(&block->cf_node.node))
1725 return NULL;
1726
1727 if (nir_cf_node_is_last(&block->cf_node))
1728 return NULL;
1729
1730 nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
1731
1732 if (next_node->type != nir_cf_node_loop)
1733 return NULL;
1734
1735 return nir_cf_node_as_loop(next_node);
1736 }
1737
1738 void
1739 nir_index_blocks(nir_function_impl *impl)
1740 {
1741 unsigned index = 0;
1742
1743 if (impl->valid_metadata & nir_metadata_block_index)
1744 return;
1745
1746 nir_foreach_block(block, impl) {
1747 block->index = index++;
1748 }
1749
1750 /* The end_block isn't really part of the program, which is why its index
1751 * is >= num_blocks.
1752 */
1753 impl->num_blocks = impl->end_block->index = index;
1754 }
1755
1756 static bool
1757 index_ssa_def_cb(nir_ssa_def *def, void *state)
1758 {
1759 unsigned *index = (unsigned *) state;
1760 def->index = (*index)++;
1761
1762 return true;
1763 }
1764
1765 /**
1766 * The indices are applied top-to-bottom which has the very nice property
1767 * that, if A dominates B, then A->index <= B->index.
1768 */
1769 void
1770 nir_index_ssa_defs(nir_function_impl *impl)
1771 {
1772 unsigned index = 0;
1773
1774 nir_foreach_block(block, impl) {
1775 nir_foreach_instr(instr, block)
1776 nir_foreach_ssa_def(instr, index_ssa_def_cb, &index);
1777 }
1778
1779 impl->ssa_alloc = index;
1780 }
1781
1782 /**
1783 * The indices are applied top-to-bottom which has the very nice property
1784 * that, if A dominates B, then A->index <= B->index.
1785 */
1786 unsigned
1787 nir_index_instrs(nir_function_impl *impl)
1788 {
1789 unsigned index = 0;
1790
1791 nir_foreach_block(block, impl) {
1792 nir_foreach_instr(instr, block)
1793 instr->index = index++;
1794 }
1795
1796 return index;
1797 }
1798
1799 nir_intrinsic_op
1800 nir_intrinsic_from_system_value(gl_system_value val)
1801 {
1802 switch (val) {
1803 case SYSTEM_VALUE_VERTEX_ID:
1804 return nir_intrinsic_load_vertex_id;
1805 case SYSTEM_VALUE_INSTANCE_ID:
1806 return nir_intrinsic_load_instance_id;
1807 case SYSTEM_VALUE_DRAW_ID:
1808 return nir_intrinsic_load_draw_id;
1809 case SYSTEM_VALUE_BASE_INSTANCE:
1810 return nir_intrinsic_load_base_instance;
1811 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
1812 return nir_intrinsic_load_vertex_id_zero_base;
1813 case SYSTEM_VALUE_IS_INDEXED_DRAW:
1814 return nir_intrinsic_load_is_indexed_draw;
1815 case SYSTEM_VALUE_FIRST_VERTEX:
1816 return nir_intrinsic_load_first_vertex;
1817 case SYSTEM_VALUE_BASE_VERTEX:
1818 return nir_intrinsic_load_base_vertex;
1819 case SYSTEM_VALUE_INVOCATION_ID:
1820 return nir_intrinsic_load_invocation_id;
1821 case SYSTEM_VALUE_FRAG_COORD:
1822 return nir_intrinsic_load_frag_coord;
1823 case SYSTEM_VALUE_FRONT_FACE:
1824 return nir_intrinsic_load_front_face;
1825 case SYSTEM_VALUE_SAMPLE_ID:
1826 return nir_intrinsic_load_sample_id;
1827 case SYSTEM_VALUE_SAMPLE_POS:
1828 return nir_intrinsic_load_sample_pos;
1829 case SYSTEM_VALUE_SAMPLE_MASK_IN:
1830 return nir_intrinsic_load_sample_mask_in;
1831 case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
1832 return nir_intrinsic_load_local_invocation_id;
1833 case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX:
1834 return nir_intrinsic_load_local_invocation_index;
1835 case SYSTEM_VALUE_WORK_GROUP_ID:
1836 return nir_intrinsic_load_work_group_id;
1837 case SYSTEM_VALUE_NUM_WORK_GROUPS:
1838 return nir_intrinsic_load_num_work_groups;
1839 case SYSTEM_VALUE_PRIMITIVE_ID:
1840 return nir_intrinsic_load_primitive_id;
1841 case SYSTEM_VALUE_TESS_COORD:
1842 return nir_intrinsic_load_tess_coord;
1843 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1844 return nir_intrinsic_load_tess_level_outer;
1845 case SYSTEM_VALUE_TESS_LEVEL_INNER:
1846 return nir_intrinsic_load_tess_level_inner;
1847 case SYSTEM_VALUE_VERTICES_IN:
1848 return nir_intrinsic_load_patch_vertices_in;
1849 case SYSTEM_VALUE_HELPER_INVOCATION:
1850 return nir_intrinsic_load_helper_invocation;
1851 case SYSTEM_VALUE_VIEW_INDEX:
1852 return nir_intrinsic_load_view_index;
1853 case SYSTEM_VALUE_SUBGROUP_SIZE:
1854 return nir_intrinsic_load_subgroup_size;
1855 case SYSTEM_VALUE_SUBGROUP_INVOCATION:
1856 return nir_intrinsic_load_subgroup_invocation;
1857 case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
1858 return nir_intrinsic_load_subgroup_eq_mask;
1859 case SYSTEM_VALUE_SUBGROUP_GE_MASK:
1860 return nir_intrinsic_load_subgroup_ge_mask;
1861 case SYSTEM_VALUE_SUBGROUP_GT_MASK:
1862 return nir_intrinsic_load_subgroup_gt_mask;
1863 case SYSTEM_VALUE_SUBGROUP_LE_MASK:
1864 return nir_intrinsic_load_subgroup_le_mask;
1865 case SYSTEM_VALUE_SUBGROUP_LT_MASK:
1866 return nir_intrinsic_load_subgroup_lt_mask;
1867 case SYSTEM_VALUE_NUM_SUBGROUPS:
1868 return nir_intrinsic_load_num_subgroups;
1869 case SYSTEM_VALUE_SUBGROUP_ID:
1870 return nir_intrinsic_load_subgroup_id;
1871 case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
1872 return nir_intrinsic_load_local_group_size;
1873 case SYSTEM_VALUE_GLOBAL_INVOCATION_ID:
1874 return nir_intrinsic_load_global_invocation_id;
1875 case SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX:
1876 return nir_intrinsic_load_global_invocation_index;
1877 case SYSTEM_VALUE_WORK_DIM:
1878 return nir_intrinsic_load_work_dim;
1879 default:
1880 unreachable("system value does not directly correspond to intrinsic");
1881 }
1882 }
1883
1884 gl_system_value
1885 nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
1886 {
1887 switch (intrin) {
1888 case nir_intrinsic_load_vertex_id:
1889 return SYSTEM_VALUE_VERTEX_ID;
1890 case nir_intrinsic_load_instance_id:
1891 return SYSTEM_VALUE_INSTANCE_ID;
1892 case nir_intrinsic_load_draw_id:
1893 return SYSTEM_VALUE_DRAW_ID;
1894 case nir_intrinsic_load_base_instance:
1895 return SYSTEM_VALUE_BASE_INSTANCE;
1896 case nir_intrinsic_load_vertex_id_zero_base:
1897 return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
1898 case nir_intrinsic_load_first_vertex:
1899 return SYSTEM_VALUE_FIRST_VERTEX;
1900 case nir_intrinsic_load_is_indexed_draw:
1901 return SYSTEM_VALUE_IS_INDEXED_DRAW;
1902 case nir_intrinsic_load_base_vertex:
1903 return SYSTEM_VALUE_BASE_VERTEX;
1904 case nir_intrinsic_load_invocation_id:
1905 return SYSTEM_VALUE_INVOCATION_ID;
1906 case nir_intrinsic_load_frag_coord:
1907 return SYSTEM_VALUE_FRAG_COORD;
1908 case nir_intrinsic_load_front_face:
1909 return SYSTEM_VALUE_FRONT_FACE;
1910 case nir_intrinsic_load_sample_id:
1911 return SYSTEM_VALUE_SAMPLE_ID;
1912 case nir_intrinsic_load_sample_pos:
1913 return SYSTEM_VALUE_SAMPLE_POS;
1914 case nir_intrinsic_load_sample_mask_in:
1915 return SYSTEM_VALUE_SAMPLE_MASK_IN;
1916 case nir_intrinsic_load_local_invocation_id:
1917 return SYSTEM_VALUE_LOCAL_INVOCATION_ID;
1918 case nir_intrinsic_load_local_invocation_index:
1919 return SYSTEM_VALUE_LOCAL_INVOCATION_INDEX;
1920 case nir_intrinsic_load_num_work_groups:
1921 return SYSTEM_VALUE_NUM_WORK_GROUPS;
1922 case nir_intrinsic_load_work_group_id:
1923 return SYSTEM_VALUE_WORK_GROUP_ID;
1924 case nir_intrinsic_load_primitive_id:
1925 return SYSTEM_VALUE_PRIMITIVE_ID;
1926 case nir_intrinsic_load_tess_coord:
1927 return SYSTEM_VALUE_TESS_COORD;
1928 case nir_intrinsic_load_tess_level_outer:
1929 return SYSTEM_VALUE_TESS_LEVEL_OUTER;
1930 case nir_intrinsic_load_tess_level_inner:
1931 return SYSTEM_VALUE_TESS_LEVEL_INNER;
1932 case nir_intrinsic_load_patch_vertices_in:
1933 return SYSTEM_VALUE_VERTICES_IN;
1934 case nir_intrinsic_load_helper_invocation:
1935 return SYSTEM_VALUE_HELPER_INVOCATION;
1936 case nir_intrinsic_load_view_index:
1937 return SYSTEM_VALUE_VIEW_INDEX;
1938 case nir_intrinsic_load_subgroup_size:
1939 return SYSTEM_VALUE_SUBGROUP_SIZE;
1940 case nir_intrinsic_load_subgroup_invocation:
1941 return SYSTEM_VALUE_SUBGROUP_INVOCATION;
1942 case nir_intrinsic_load_subgroup_eq_mask:
1943 return SYSTEM_VALUE_SUBGROUP_EQ_MASK;
1944 case nir_intrinsic_load_subgroup_ge_mask:
1945 return SYSTEM_VALUE_SUBGROUP_GE_MASK;
1946 case nir_intrinsic_load_subgroup_gt_mask:
1947 return SYSTEM_VALUE_SUBGROUP_GT_MASK;
1948 case nir_intrinsic_load_subgroup_le_mask:
1949 return SYSTEM_VALUE_SUBGROUP_LE_MASK;
1950 case nir_intrinsic_load_subgroup_lt_mask:
1951 return SYSTEM_VALUE_SUBGROUP_LT_MASK;
1952 case nir_intrinsic_load_num_subgroups:
1953 return SYSTEM_VALUE_NUM_SUBGROUPS;
1954 case nir_intrinsic_load_subgroup_id:
1955 return SYSTEM_VALUE_SUBGROUP_ID;
1956 case nir_intrinsic_load_local_group_size:
1957 return SYSTEM_VALUE_LOCAL_GROUP_SIZE;
1958 case nir_intrinsic_load_global_invocation_id:
1959 return SYSTEM_VALUE_GLOBAL_INVOCATION_ID;
1960 default:
1961 unreachable("intrinsic doesn't produce a system value");
1962 }
1963 }
1964
1965 /* OpenGL utility method that remaps the location attributes if they are
1966 * doubles. Not needed for vulkan due the differences on the input location
1967 * count for doubles on vulkan vs OpenGL
1968 *
1969 * The bitfield returned in dual_slot is one bit for each double input slot in
1970 * the original OpenGL single-slot input numbering. The mapping from old
1971 * locations to new locations is as follows:
1972 *
1973 * new_loc = loc + util_bitcount(dual_slot & BITFIELD64_MASK(loc))
1974 */
1975 void
1976 nir_remap_dual_slot_attributes(nir_shader *shader, uint64_t *dual_slot)
1977 {
1978 assert(shader->info.stage == MESA_SHADER_VERTEX);
1979
1980 *dual_slot = 0;
1981 nir_foreach_variable(var, &shader->inputs) {
1982 if (glsl_type_is_dual_slot(glsl_without_array(var->type))) {
1983 unsigned slots = glsl_count_attribute_slots(var->type, true);
1984 *dual_slot |= BITFIELD64_MASK(slots) << var->data.location;
1985 }
1986 }
1987
1988 nir_foreach_variable(var, &shader->inputs) {
1989 var->data.location +=
1990 util_bitcount64(*dual_slot & BITFIELD64_MASK(var->data.location));
1991 }
1992 }
1993
1994 /* Returns an attribute mask that has been re-compacted using the given
1995 * dual_slot mask.
1996 */
1997 uint64_t
1998 nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot)
1999 {
2000 while (dual_slot) {
2001 unsigned loc = u_bit_scan64(&dual_slot);
2002 /* mask of all bits up to and including loc */
2003 uint64_t mask = BITFIELD64_MASK(loc + 1);
2004 attribs = (attribs & mask) | ((attribs & ~mask) >> 1);
2005 }
2006 return attribs;
2007 }
2008
2009 void
2010 nir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin, nir_ssa_def *src,
2011 bool bindless)
2012 {
2013 switch (intrin->intrinsic) {
2014 #define CASE(op) \
2015 case nir_intrinsic_image_deref_##op: \
2016 intrin->intrinsic = bindless ? nir_intrinsic_bindless_image_##op \
2017 : nir_intrinsic_image_##op; \
2018 break;
2019 CASE(load)
2020 CASE(store)
2021 CASE(atomic_add)
2022 CASE(atomic_min)
2023 CASE(atomic_max)
2024 CASE(atomic_and)
2025 CASE(atomic_or)
2026 CASE(atomic_xor)
2027 CASE(atomic_exchange)
2028 CASE(atomic_comp_swap)
2029 CASE(atomic_fadd)
2030 CASE(size)
2031 CASE(samples)
2032 CASE(load_raw_intel)
2033 CASE(store_raw_intel)
2034 #undef CASE
2035 default:
2036 unreachable("Unhanded image intrinsic");
2037 }
2038
2039 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2040 nir_variable *var = nir_deref_instr_get_variable(deref);
2041
2042 nir_intrinsic_set_image_dim(intrin, glsl_get_sampler_dim(deref->type));
2043 nir_intrinsic_set_image_array(intrin, glsl_sampler_type_is_array(deref->type));
2044 nir_intrinsic_set_access(intrin, var->data.image.access);
2045 nir_intrinsic_set_format(intrin, var->data.image.format);
2046
2047 nir_instr_rewrite_src(&intrin->instr, &intrin->src[0],
2048 nir_src_for_ssa(src));
2049 }