3c80e03a091b11cc016e3cecb9be815a3dc4d337
[mesa.git] / src / compiler / nir / nir.c
1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Connor Abbott (cwabbott0@gmail.com)
25 *
26 */
27
28 #include "nir.h"
29 #include "nir_control_flow_private.h"
30 #include "util/half_float.h"
31 #include <limits.h>
32 #include <assert.h>
33 #include <math.h>
34 #include "util/u_math.h"
35
36 #include "main/menums.h" /* BITFIELD64_MASK */
37
38 nir_shader *
39 nir_shader_create(void *mem_ctx,
40 gl_shader_stage stage,
41 const nir_shader_compiler_options *options,
42 shader_info *si)
43 {
44 nir_shader *shader = rzalloc(mem_ctx, nir_shader);
45
46 exec_list_make_empty(&shader->uniforms);
47 exec_list_make_empty(&shader->inputs);
48 exec_list_make_empty(&shader->outputs);
49 exec_list_make_empty(&shader->shared);
50
51 shader->options = options;
52
53 if (si) {
54 assert(si->stage == stage);
55 shader->info = *si;
56 } else {
57 shader->info.stage = stage;
58 }
59
60 exec_list_make_empty(&shader->functions);
61 exec_list_make_empty(&shader->registers);
62 exec_list_make_empty(&shader->globals);
63 exec_list_make_empty(&shader->system_values);
64 shader->reg_alloc = 0;
65
66 shader->num_inputs = 0;
67 shader->num_outputs = 0;
68 shader->num_uniforms = 0;
69 shader->num_shared = 0;
70
71 return shader;
72 }
73
74 static nir_register *
75 reg_create(void *mem_ctx, struct exec_list *list)
76 {
77 nir_register *reg = ralloc(mem_ctx, nir_register);
78
79 list_inithead(&reg->uses);
80 list_inithead(&reg->defs);
81 list_inithead(&reg->if_uses);
82
83 reg->num_components = 0;
84 reg->bit_size = 32;
85 reg->num_array_elems = 0;
86 reg->is_packed = false;
87 reg->name = NULL;
88
89 exec_list_push_tail(list, &reg->node);
90
91 return reg;
92 }
93
94 nir_register *
95 nir_global_reg_create(nir_shader *shader)
96 {
97 nir_register *reg = reg_create(shader, &shader->registers);
98 reg->index = shader->reg_alloc++;
99 reg->is_global = true;
100
101 return reg;
102 }
103
104 nir_register *
105 nir_local_reg_create(nir_function_impl *impl)
106 {
107 nir_register *reg = reg_create(ralloc_parent(impl), &impl->registers);
108 reg->index = impl->reg_alloc++;
109 reg->is_global = false;
110
111 return reg;
112 }
113
114 void
115 nir_reg_remove(nir_register *reg)
116 {
117 exec_node_remove(&reg->node);
118 }
119
120 void
121 nir_shader_add_variable(nir_shader *shader, nir_variable *var)
122 {
123 switch (var->data.mode) {
124 case nir_var_all:
125 assert(!"invalid mode");
126 break;
127
128 case nir_var_local:
129 assert(!"nir_shader_add_variable cannot be used for local variables");
130 break;
131
132 case nir_var_global:
133 exec_list_push_tail(&shader->globals, &var->node);
134 break;
135
136 case nir_var_shader_in:
137 exec_list_push_tail(&shader->inputs, &var->node);
138 break;
139
140 case nir_var_shader_out:
141 exec_list_push_tail(&shader->outputs, &var->node);
142 break;
143
144 case nir_var_uniform:
145 case nir_var_shader_storage:
146 exec_list_push_tail(&shader->uniforms, &var->node);
147 break;
148
149 case nir_var_shared:
150 assert(shader->info.stage == MESA_SHADER_COMPUTE);
151 exec_list_push_tail(&shader->shared, &var->node);
152 break;
153
154 case nir_var_system_value:
155 exec_list_push_tail(&shader->system_values, &var->node);
156 break;
157 }
158 }
159
160 nir_variable *
161 nir_variable_create(nir_shader *shader, nir_variable_mode mode,
162 const struct glsl_type *type, const char *name)
163 {
164 nir_variable *var = rzalloc(shader, nir_variable);
165 var->name = ralloc_strdup(var, name);
166 var->type = type;
167 var->data.mode = mode;
168 var->data.how_declared = nir_var_declared_normally;
169
170 if ((mode == nir_var_shader_in &&
171 shader->info.stage != MESA_SHADER_VERTEX) ||
172 (mode == nir_var_shader_out &&
173 shader->info.stage != MESA_SHADER_FRAGMENT))
174 var->data.interpolation = INTERP_MODE_SMOOTH;
175
176 if (mode == nir_var_shader_in || mode == nir_var_uniform)
177 var->data.read_only = true;
178
179 nir_shader_add_variable(shader, var);
180
181 return var;
182 }
183
184 nir_variable *
185 nir_local_variable_create(nir_function_impl *impl,
186 const struct glsl_type *type, const char *name)
187 {
188 nir_variable *var = rzalloc(impl->function->shader, nir_variable);
189 var->name = ralloc_strdup(var, name);
190 var->type = type;
191 var->data.mode = nir_var_local;
192
193 nir_function_impl_add_variable(impl, var);
194
195 return var;
196 }
197
198 nir_function *
199 nir_function_create(nir_shader *shader, const char *name)
200 {
201 nir_function *func = ralloc(shader, nir_function);
202
203 exec_list_push_tail(&shader->functions, &func->node);
204
205 func->name = ralloc_strdup(func, name);
206 func->shader = shader;
207 func->num_params = 0;
208 func->params = NULL;
209 func->impl = NULL;
210
211 return func;
212 }
213
214 /* NOTE: if the instruction you are copying a src to is already added
215 * to the IR, use nir_instr_rewrite_src() instead.
216 */
217 void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx)
218 {
219 dest->is_ssa = src->is_ssa;
220 if (src->is_ssa) {
221 dest->ssa = src->ssa;
222 } else {
223 dest->reg.base_offset = src->reg.base_offset;
224 dest->reg.reg = src->reg.reg;
225 if (src->reg.indirect) {
226 dest->reg.indirect = ralloc(mem_ctx, nir_src);
227 nir_src_copy(dest->reg.indirect, src->reg.indirect, mem_ctx);
228 } else {
229 dest->reg.indirect = NULL;
230 }
231 }
232 }
233
234 void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr)
235 {
236 /* Copying an SSA definition makes no sense whatsoever. */
237 assert(!src->is_ssa);
238
239 dest->is_ssa = false;
240
241 dest->reg.base_offset = src->reg.base_offset;
242 dest->reg.reg = src->reg.reg;
243 if (src->reg.indirect) {
244 dest->reg.indirect = ralloc(instr, nir_src);
245 nir_src_copy(dest->reg.indirect, src->reg.indirect, instr);
246 } else {
247 dest->reg.indirect = NULL;
248 }
249 }
250
251 void
252 nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src,
253 nir_alu_instr *instr)
254 {
255 nir_src_copy(&dest->src, &src->src, &instr->instr);
256 dest->abs = src->abs;
257 dest->negate = src->negate;
258 for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++)
259 dest->swizzle[i] = src->swizzle[i];
260 }
261
262 void
263 nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
264 nir_alu_instr *instr)
265 {
266 nir_dest_copy(&dest->dest, &src->dest, &instr->instr);
267 dest->write_mask = src->write_mask;
268 dest->saturate = src->saturate;
269 }
270
271
272 static void
273 cf_init(nir_cf_node *node, nir_cf_node_type type)
274 {
275 exec_node_init(&node->node);
276 node->parent = NULL;
277 node->type = type;
278 }
279
280 nir_function_impl *
281 nir_function_impl_create_bare(nir_shader *shader)
282 {
283 nir_function_impl *impl = ralloc(shader, nir_function_impl);
284
285 impl->function = NULL;
286
287 cf_init(&impl->cf_node, nir_cf_node_function);
288
289 exec_list_make_empty(&impl->body);
290 exec_list_make_empty(&impl->registers);
291 exec_list_make_empty(&impl->locals);
292 impl->reg_alloc = 0;
293 impl->ssa_alloc = 0;
294 impl->valid_metadata = nir_metadata_none;
295
296 /* create start & end blocks */
297 nir_block *start_block = nir_block_create(shader);
298 nir_block *end_block = nir_block_create(shader);
299 start_block->cf_node.parent = &impl->cf_node;
300 end_block->cf_node.parent = &impl->cf_node;
301 impl->end_block = end_block;
302
303 exec_list_push_tail(&impl->body, &start_block->cf_node.node);
304
305 start_block->successors[0] = end_block;
306 _mesa_set_add(end_block->predecessors, start_block);
307 return impl;
308 }
309
310 nir_function_impl *
311 nir_function_impl_create(nir_function *function)
312 {
313 assert(function->impl == NULL);
314
315 nir_function_impl *impl = nir_function_impl_create_bare(function->shader);
316
317 function->impl = impl;
318 impl->function = function;
319
320 return impl;
321 }
322
323 nir_block *
324 nir_block_create(nir_shader *shader)
325 {
326 nir_block *block = rzalloc(shader, nir_block);
327
328 cf_init(&block->cf_node, nir_cf_node_block);
329
330 block->successors[0] = block->successors[1] = NULL;
331 block->predecessors = _mesa_set_create(block, _mesa_hash_pointer,
332 _mesa_key_pointer_equal);
333 block->imm_dom = NULL;
334 /* XXX maybe it would be worth it to defer allocation? This
335 * way it doesn't get allocated for shader refs that never run
336 * nir_calc_dominance? For example, state-tracker creates an
337 * initial IR, clones that, runs appropriate lowering pass, passes
338 * to driver which does common lowering/opt, and then stores ref
339 * which is later used to do state specific lowering and futher
340 * opt. Do any of the references not need dominance metadata?
341 */
342 block->dom_frontier = _mesa_set_create(block, _mesa_hash_pointer,
343 _mesa_key_pointer_equal);
344
345 exec_list_make_empty(&block->instr_list);
346
347 return block;
348 }
349
350 static inline void
351 src_init(nir_src *src)
352 {
353 src->is_ssa = false;
354 src->reg.reg = NULL;
355 src->reg.indirect = NULL;
356 src->reg.base_offset = 0;
357 }
358
359 nir_if *
360 nir_if_create(nir_shader *shader)
361 {
362 nir_if *if_stmt = ralloc(shader, nir_if);
363
364 cf_init(&if_stmt->cf_node, nir_cf_node_if);
365 src_init(&if_stmt->condition);
366
367 nir_block *then = nir_block_create(shader);
368 exec_list_make_empty(&if_stmt->then_list);
369 exec_list_push_tail(&if_stmt->then_list, &then->cf_node.node);
370 then->cf_node.parent = &if_stmt->cf_node;
371
372 nir_block *else_stmt = nir_block_create(shader);
373 exec_list_make_empty(&if_stmt->else_list);
374 exec_list_push_tail(&if_stmt->else_list, &else_stmt->cf_node.node);
375 else_stmt->cf_node.parent = &if_stmt->cf_node;
376
377 return if_stmt;
378 }
379
380 nir_loop *
381 nir_loop_create(nir_shader *shader)
382 {
383 nir_loop *loop = rzalloc(shader, nir_loop);
384
385 cf_init(&loop->cf_node, nir_cf_node_loop);
386
387 nir_block *body = nir_block_create(shader);
388 exec_list_make_empty(&loop->body);
389 exec_list_push_tail(&loop->body, &body->cf_node.node);
390 body->cf_node.parent = &loop->cf_node;
391
392 body->successors[0] = body;
393 _mesa_set_add(body->predecessors, body);
394
395 return loop;
396 }
397
398 static void
399 instr_init(nir_instr *instr, nir_instr_type type)
400 {
401 instr->type = type;
402 instr->block = NULL;
403 exec_node_init(&instr->node);
404 }
405
406 static void
407 dest_init(nir_dest *dest)
408 {
409 dest->is_ssa = false;
410 dest->reg.reg = NULL;
411 dest->reg.indirect = NULL;
412 dest->reg.base_offset = 0;
413 }
414
415 static void
416 alu_dest_init(nir_alu_dest *dest)
417 {
418 dest_init(&dest->dest);
419 dest->saturate = false;
420 dest->write_mask = 0xf;
421 }
422
423 static void
424 alu_src_init(nir_alu_src *src)
425 {
426 src_init(&src->src);
427 src->abs = src->negate = false;
428 for (int i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i)
429 src->swizzle[i] = i;
430 }
431
432 nir_alu_instr *
433 nir_alu_instr_create(nir_shader *shader, nir_op op)
434 {
435 unsigned num_srcs = nir_op_infos[op].num_inputs;
436 /* TODO: don't use rzalloc */
437 nir_alu_instr *instr =
438 rzalloc_size(shader,
439 sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src));
440
441 instr_init(&instr->instr, nir_instr_type_alu);
442 instr->op = op;
443 alu_dest_init(&instr->dest);
444 for (unsigned i = 0; i < num_srcs; i++)
445 alu_src_init(&instr->src[i]);
446
447 return instr;
448 }
449
450 nir_deref_instr *
451 nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type)
452 {
453 nir_deref_instr *instr =
454 rzalloc_size(shader, sizeof(nir_deref_instr));
455
456 instr_init(&instr->instr, nir_instr_type_deref);
457
458 instr->deref_type = deref_type;
459 if (deref_type != nir_deref_type_var)
460 src_init(&instr->parent);
461
462 if (deref_type == nir_deref_type_array)
463 src_init(&instr->arr.index);
464
465 dest_init(&instr->dest);
466
467 return instr;
468 }
469
470 nir_jump_instr *
471 nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
472 {
473 nir_jump_instr *instr = ralloc(shader, nir_jump_instr);
474 instr_init(&instr->instr, nir_instr_type_jump);
475 instr->type = type;
476 return instr;
477 }
478
479 nir_load_const_instr *
480 nir_load_const_instr_create(nir_shader *shader, unsigned num_components,
481 unsigned bit_size)
482 {
483 nir_load_const_instr *instr = rzalloc(shader, nir_load_const_instr);
484 instr_init(&instr->instr, nir_instr_type_load_const);
485
486 nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size, NULL);
487
488 return instr;
489 }
490
491 nir_intrinsic_instr *
492 nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op)
493 {
494 unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
495 /* TODO: don't use rzalloc */
496 nir_intrinsic_instr *instr =
497 rzalloc_size(shader,
498 sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src));
499
500 instr_init(&instr->instr, nir_instr_type_intrinsic);
501 instr->intrinsic = op;
502
503 if (nir_intrinsic_infos[op].has_dest)
504 dest_init(&instr->dest);
505
506 for (unsigned i = 0; i < num_srcs; i++)
507 src_init(&instr->src[i]);
508
509 return instr;
510 }
511
512 nir_call_instr *
513 nir_call_instr_create(nir_shader *shader, nir_function *callee)
514 {
515 const unsigned num_params = callee->num_params;
516 nir_call_instr *instr =
517 rzalloc_size(shader, sizeof(*instr) +
518 num_params * sizeof(instr->params[0]));
519
520 instr_init(&instr->instr, nir_instr_type_call);
521 instr->callee = callee;
522 instr->num_params = num_params;
523 for (unsigned i = 0; i < num_params; i++)
524 src_init(&instr->params[i]);
525
526 return instr;
527 }
528
529 nir_tex_instr *
530 nir_tex_instr_create(nir_shader *shader, unsigned num_srcs)
531 {
532 nir_tex_instr *instr = rzalloc(shader, nir_tex_instr);
533 instr_init(&instr->instr, nir_instr_type_tex);
534
535 dest_init(&instr->dest);
536
537 instr->num_srcs = num_srcs;
538 instr->src = ralloc_array(instr, nir_tex_src, num_srcs);
539 for (unsigned i = 0; i < num_srcs; i++)
540 src_init(&instr->src[i].src);
541
542 instr->texture_index = 0;
543 instr->texture_array_size = 0;
544 instr->sampler_index = 0;
545
546 return instr;
547 }
548
549 void
550 nir_tex_instr_add_src(nir_tex_instr *tex,
551 nir_tex_src_type src_type,
552 nir_src src)
553 {
554 nir_tex_src *new_srcs = rzalloc_array(tex, nir_tex_src,
555 tex->num_srcs + 1);
556
557 for (unsigned i = 0; i < tex->num_srcs; i++) {
558 new_srcs[i].src_type = tex->src[i].src_type;
559 nir_instr_move_src(&tex->instr, &new_srcs[i].src,
560 &tex->src[i].src);
561 }
562
563 ralloc_free(tex->src);
564 tex->src = new_srcs;
565
566 tex->src[tex->num_srcs].src_type = src_type;
567 nir_instr_rewrite_src(&tex->instr, &tex->src[tex->num_srcs].src, src);
568 tex->num_srcs++;
569 }
570
571 void
572 nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx)
573 {
574 assert(src_idx < tex->num_srcs);
575
576 /* First rewrite the source to NIR_SRC_INIT */
577 nir_instr_rewrite_src(&tex->instr, &tex->src[src_idx].src, NIR_SRC_INIT);
578
579 /* Now, move all of the other sources down */
580 for (unsigned i = src_idx + 1; i < tex->num_srcs; i++) {
581 tex->src[i-1].src_type = tex->src[i].src_type;
582 nir_instr_move_src(&tex->instr, &tex->src[i-1].src, &tex->src[i].src);
583 }
584 tex->num_srcs--;
585 }
586
587 nir_phi_instr *
588 nir_phi_instr_create(nir_shader *shader)
589 {
590 nir_phi_instr *instr = ralloc(shader, nir_phi_instr);
591 instr_init(&instr->instr, nir_instr_type_phi);
592
593 dest_init(&instr->dest);
594 exec_list_make_empty(&instr->srcs);
595 return instr;
596 }
597
598 nir_parallel_copy_instr *
599 nir_parallel_copy_instr_create(nir_shader *shader)
600 {
601 nir_parallel_copy_instr *instr = ralloc(shader, nir_parallel_copy_instr);
602 instr_init(&instr->instr, nir_instr_type_parallel_copy);
603
604 exec_list_make_empty(&instr->entries);
605
606 return instr;
607 }
608
609 nir_ssa_undef_instr *
610 nir_ssa_undef_instr_create(nir_shader *shader,
611 unsigned num_components,
612 unsigned bit_size)
613 {
614 nir_ssa_undef_instr *instr = ralloc(shader, nir_ssa_undef_instr);
615 instr_init(&instr->instr, nir_instr_type_ssa_undef);
616
617 nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size, NULL);
618
619 return instr;
620 }
621
622 static nir_const_value
623 const_value_float(double d, unsigned bit_size)
624 {
625 nir_const_value v;
626 switch (bit_size) {
627 case 16: v.u16[0] = _mesa_float_to_half(d); break;
628 case 32: v.f32[0] = d; break;
629 case 64: v.f64[0] = d; break;
630 default:
631 unreachable("Invalid bit size");
632 }
633 return v;
634 }
635
636 static nir_const_value
637 const_value_int(int64_t i, unsigned bit_size)
638 {
639 nir_const_value v;
640 switch (bit_size) {
641 case 1: v.b[0] = i & 1; break;
642 case 8: v.i8[0] = i; break;
643 case 16: v.i16[0] = i; break;
644 case 32: v.i32[0] = i; break;
645 case 64: v.i64[0] = i; break;
646 default:
647 unreachable("Invalid bit size");
648 }
649 return v;
650 }
651
652 nir_const_value
653 nir_alu_binop_identity(nir_op binop, unsigned bit_size)
654 {
655 const int64_t max_int = (1ull << (bit_size - 1)) - 1;
656 const int64_t min_int = -max_int - 1;
657 switch (binop) {
658 case nir_op_iadd:
659 return const_value_int(0, bit_size);
660 case nir_op_fadd:
661 return const_value_float(0, bit_size);
662 case nir_op_imul:
663 return const_value_int(1, bit_size);
664 case nir_op_fmul:
665 return const_value_float(1, bit_size);
666 case nir_op_imin:
667 return const_value_int(max_int, bit_size);
668 case nir_op_umin:
669 return const_value_int(~0ull, bit_size);
670 case nir_op_fmin:
671 return const_value_float(INFINITY, bit_size);
672 case nir_op_imax:
673 return const_value_int(min_int, bit_size);
674 case nir_op_umax:
675 return const_value_int(0, bit_size);
676 case nir_op_fmax:
677 return const_value_float(-INFINITY, bit_size);
678 case nir_op_iand:
679 return const_value_int(~0ull, bit_size);
680 case nir_op_ior:
681 return const_value_int(0, bit_size);
682 case nir_op_ixor:
683 return const_value_int(0, bit_size);
684 default:
685 unreachable("Invalid reduction operation");
686 }
687 }
688
689 nir_function_impl *
690 nir_cf_node_get_function(nir_cf_node *node)
691 {
692 while (node->type != nir_cf_node_function) {
693 node = node->parent;
694 }
695
696 return nir_cf_node_as_function(node);
697 }
698
699 /* Reduces a cursor by trying to convert everything to after and trying to
700 * go up to block granularity when possible.
701 */
702 static nir_cursor
703 reduce_cursor(nir_cursor cursor)
704 {
705 switch (cursor.option) {
706 case nir_cursor_before_block:
707 assert(nir_cf_node_prev(&cursor.block->cf_node) == NULL ||
708 nir_cf_node_prev(&cursor.block->cf_node)->type != nir_cf_node_block);
709 if (exec_list_is_empty(&cursor.block->instr_list)) {
710 /* Empty block. After is as good as before. */
711 cursor.option = nir_cursor_after_block;
712 }
713 return cursor;
714
715 case nir_cursor_after_block:
716 return cursor;
717
718 case nir_cursor_before_instr: {
719 nir_instr *prev_instr = nir_instr_prev(cursor.instr);
720 if (prev_instr) {
721 /* Before this instruction is after the previous */
722 cursor.instr = prev_instr;
723 cursor.option = nir_cursor_after_instr;
724 } else {
725 /* No previous instruction. Switch to before block */
726 cursor.block = cursor.instr->block;
727 cursor.option = nir_cursor_before_block;
728 }
729 return reduce_cursor(cursor);
730 }
731
732 case nir_cursor_after_instr:
733 if (nir_instr_next(cursor.instr) == NULL) {
734 /* This is the last instruction, switch to after block */
735 cursor.option = nir_cursor_after_block;
736 cursor.block = cursor.instr->block;
737 }
738 return cursor;
739
740 default:
741 unreachable("Inavlid cursor option");
742 }
743 }
744
745 bool
746 nir_cursors_equal(nir_cursor a, nir_cursor b)
747 {
748 /* Reduced cursors should be unique */
749 a = reduce_cursor(a);
750 b = reduce_cursor(b);
751
752 return a.block == b.block && a.option == b.option;
753 }
754
755 static bool
756 add_use_cb(nir_src *src, void *state)
757 {
758 nir_instr *instr = state;
759
760 src->parent_instr = instr;
761 list_addtail(&src->use_link,
762 src->is_ssa ? &src->ssa->uses : &src->reg.reg->uses);
763
764 return true;
765 }
766
767 static bool
768 add_ssa_def_cb(nir_ssa_def *def, void *state)
769 {
770 nir_instr *instr = state;
771
772 if (instr->block && def->index == UINT_MAX) {
773 nir_function_impl *impl =
774 nir_cf_node_get_function(&instr->block->cf_node);
775
776 def->index = impl->ssa_alloc++;
777 }
778
779 return true;
780 }
781
782 static bool
783 add_reg_def_cb(nir_dest *dest, void *state)
784 {
785 nir_instr *instr = state;
786
787 if (!dest->is_ssa) {
788 dest->reg.parent_instr = instr;
789 list_addtail(&dest->reg.def_link, &dest->reg.reg->defs);
790 }
791
792 return true;
793 }
794
795 static void
796 add_defs_uses(nir_instr *instr)
797 {
798 nir_foreach_src(instr, add_use_cb, instr);
799 nir_foreach_dest(instr, add_reg_def_cb, instr);
800 nir_foreach_ssa_def(instr, add_ssa_def_cb, instr);
801 }
802
803 void
804 nir_instr_insert(nir_cursor cursor, nir_instr *instr)
805 {
806 switch (cursor.option) {
807 case nir_cursor_before_block:
808 /* Only allow inserting jumps into empty blocks. */
809 if (instr->type == nir_instr_type_jump)
810 assert(exec_list_is_empty(&cursor.block->instr_list));
811
812 instr->block = cursor.block;
813 add_defs_uses(instr);
814 exec_list_push_head(&cursor.block->instr_list, &instr->node);
815 break;
816 case nir_cursor_after_block: {
817 /* Inserting instructions after a jump is illegal. */
818 nir_instr *last = nir_block_last_instr(cursor.block);
819 assert(last == NULL || last->type != nir_instr_type_jump);
820 (void) last;
821
822 instr->block = cursor.block;
823 add_defs_uses(instr);
824 exec_list_push_tail(&cursor.block->instr_list, &instr->node);
825 break;
826 }
827 case nir_cursor_before_instr:
828 assert(instr->type != nir_instr_type_jump);
829 instr->block = cursor.instr->block;
830 add_defs_uses(instr);
831 exec_node_insert_node_before(&cursor.instr->node, &instr->node);
832 break;
833 case nir_cursor_after_instr:
834 /* Inserting instructions after a jump is illegal. */
835 assert(cursor.instr->type != nir_instr_type_jump);
836
837 /* Only allow inserting jumps at the end of the block. */
838 if (instr->type == nir_instr_type_jump)
839 assert(cursor.instr == nir_block_last_instr(cursor.instr->block));
840
841 instr->block = cursor.instr->block;
842 add_defs_uses(instr);
843 exec_node_insert_after(&cursor.instr->node, &instr->node);
844 break;
845 }
846
847 if (instr->type == nir_instr_type_jump)
848 nir_handle_add_jump(instr->block);
849 }
850
851 static bool
852 src_is_valid(const nir_src *src)
853 {
854 return src->is_ssa ? (src->ssa != NULL) : (src->reg.reg != NULL);
855 }
856
857 static bool
858 remove_use_cb(nir_src *src, void *state)
859 {
860 (void) state;
861
862 if (src_is_valid(src))
863 list_del(&src->use_link);
864
865 return true;
866 }
867
868 static bool
869 remove_def_cb(nir_dest *dest, void *state)
870 {
871 (void) state;
872
873 if (!dest->is_ssa)
874 list_del(&dest->reg.def_link);
875
876 return true;
877 }
878
879 static void
880 remove_defs_uses(nir_instr *instr)
881 {
882 nir_foreach_dest(instr, remove_def_cb, instr);
883 nir_foreach_src(instr, remove_use_cb, instr);
884 }
885
886 void nir_instr_remove_v(nir_instr *instr)
887 {
888 remove_defs_uses(instr);
889 exec_node_remove(&instr->node);
890
891 if (instr->type == nir_instr_type_jump) {
892 nir_jump_instr *jump_instr = nir_instr_as_jump(instr);
893 nir_handle_remove_jump(instr->block, jump_instr->type);
894 }
895 }
896
897 /*@}*/
898
899 void
900 nir_index_local_regs(nir_function_impl *impl)
901 {
902 unsigned index = 0;
903 foreach_list_typed(nir_register, reg, node, &impl->registers) {
904 reg->index = index++;
905 }
906 impl->reg_alloc = index;
907 }
908
909 void
910 nir_index_global_regs(nir_shader *shader)
911 {
912 unsigned index = 0;
913 foreach_list_typed(nir_register, reg, node, &shader->registers) {
914 reg->index = index++;
915 }
916 shader->reg_alloc = index;
917 }
918
919 static bool
920 visit_alu_dest(nir_alu_instr *instr, nir_foreach_dest_cb cb, void *state)
921 {
922 return cb(&instr->dest.dest, state);
923 }
924
925 static bool
926 visit_deref_dest(nir_deref_instr *instr, nir_foreach_dest_cb cb, void *state)
927 {
928 return cb(&instr->dest, state);
929 }
930
931 static bool
932 visit_intrinsic_dest(nir_intrinsic_instr *instr, nir_foreach_dest_cb cb,
933 void *state)
934 {
935 if (nir_intrinsic_infos[instr->intrinsic].has_dest)
936 return cb(&instr->dest, state);
937
938 return true;
939 }
940
941 static bool
942 visit_texture_dest(nir_tex_instr *instr, nir_foreach_dest_cb cb,
943 void *state)
944 {
945 return cb(&instr->dest, state);
946 }
947
948 static bool
949 visit_phi_dest(nir_phi_instr *instr, nir_foreach_dest_cb cb, void *state)
950 {
951 return cb(&instr->dest, state);
952 }
953
954 static bool
955 visit_parallel_copy_dest(nir_parallel_copy_instr *instr,
956 nir_foreach_dest_cb cb, void *state)
957 {
958 nir_foreach_parallel_copy_entry(entry, instr) {
959 if (!cb(&entry->dest, state))
960 return false;
961 }
962
963 return true;
964 }
965
966 bool
967 nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state)
968 {
969 switch (instr->type) {
970 case nir_instr_type_alu:
971 return visit_alu_dest(nir_instr_as_alu(instr), cb, state);
972 case nir_instr_type_deref:
973 return visit_deref_dest(nir_instr_as_deref(instr), cb, state);
974 case nir_instr_type_intrinsic:
975 return visit_intrinsic_dest(nir_instr_as_intrinsic(instr), cb, state);
976 case nir_instr_type_tex:
977 return visit_texture_dest(nir_instr_as_tex(instr), cb, state);
978 case nir_instr_type_phi:
979 return visit_phi_dest(nir_instr_as_phi(instr), cb, state);
980 case nir_instr_type_parallel_copy:
981 return visit_parallel_copy_dest(nir_instr_as_parallel_copy(instr),
982 cb, state);
983
984 case nir_instr_type_load_const:
985 case nir_instr_type_ssa_undef:
986 case nir_instr_type_call:
987 case nir_instr_type_jump:
988 break;
989
990 default:
991 unreachable("Invalid instruction type");
992 break;
993 }
994
995 return true;
996 }
997
998 struct foreach_ssa_def_state {
999 nir_foreach_ssa_def_cb cb;
1000 void *client_state;
1001 };
1002
1003 static inline bool
1004 nir_ssa_def_visitor(nir_dest *dest, void *void_state)
1005 {
1006 struct foreach_ssa_def_state *state = void_state;
1007
1008 if (dest->is_ssa)
1009 return state->cb(&dest->ssa, state->client_state);
1010 else
1011 return true;
1012 }
1013
1014 bool
1015 nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state)
1016 {
1017 switch (instr->type) {
1018 case nir_instr_type_alu:
1019 case nir_instr_type_deref:
1020 case nir_instr_type_tex:
1021 case nir_instr_type_intrinsic:
1022 case nir_instr_type_phi:
1023 case nir_instr_type_parallel_copy: {
1024 struct foreach_ssa_def_state foreach_state = {cb, state};
1025 return nir_foreach_dest(instr, nir_ssa_def_visitor, &foreach_state);
1026 }
1027
1028 case nir_instr_type_load_const:
1029 return cb(&nir_instr_as_load_const(instr)->def, state);
1030 case nir_instr_type_ssa_undef:
1031 return cb(&nir_instr_as_ssa_undef(instr)->def, state);
1032 case nir_instr_type_call:
1033 case nir_instr_type_jump:
1034 return true;
1035 default:
1036 unreachable("Invalid instruction type");
1037 }
1038 }
1039
1040 static bool
1041 visit_src(nir_src *src, nir_foreach_src_cb cb, void *state)
1042 {
1043 if (!cb(src, state))
1044 return false;
1045 if (!src->is_ssa && src->reg.indirect)
1046 return cb(src->reg.indirect, state);
1047 return true;
1048 }
1049
1050 static bool
1051 visit_alu_src(nir_alu_instr *instr, nir_foreach_src_cb cb, void *state)
1052 {
1053 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
1054 if (!visit_src(&instr->src[i].src, cb, state))
1055 return false;
1056
1057 return true;
1058 }
1059
1060 static bool
1061 visit_deref_instr_src(nir_deref_instr *instr,
1062 nir_foreach_src_cb cb, void *state)
1063 {
1064 if (instr->deref_type != nir_deref_type_var) {
1065 if (!visit_src(&instr->parent, cb, state))
1066 return false;
1067 }
1068
1069 if (instr->deref_type == nir_deref_type_array) {
1070 if (!visit_src(&instr->arr.index, cb, state))
1071 return false;
1072 }
1073
1074 return true;
1075 }
1076
1077 static bool
1078 visit_tex_src(nir_tex_instr *instr, nir_foreach_src_cb cb, void *state)
1079 {
1080 for (unsigned i = 0; i < instr->num_srcs; i++) {
1081 if (!visit_src(&instr->src[i].src, cb, state))
1082 return false;
1083 }
1084
1085 return true;
1086 }
1087
1088 static bool
1089 visit_intrinsic_src(nir_intrinsic_instr *instr, nir_foreach_src_cb cb,
1090 void *state)
1091 {
1092 unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
1093 for (unsigned i = 0; i < num_srcs; i++) {
1094 if (!visit_src(&instr->src[i], cb, state))
1095 return false;
1096 }
1097
1098 return true;
1099 }
1100
1101 static bool
1102 visit_call_src(nir_call_instr *instr, nir_foreach_src_cb cb, void *state)
1103 {
1104 for (unsigned i = 0; i < instr->num_params; i++) {
1105 if (!visit_src(&instr->params[i], cb, state))
1106 return false;
1107 }
1108
1109 return true;
1110 }
1111
1112 static bool
1113 visit_phi_src(nir_phi_instr *instr, nir_foreach_src_cb cb, void *state)
1114 {
1115 nir_foreach_phi_src(src, instr) {
1116 if (!visit_src(&src->src, cb, state))
1117 return false;
1118 }
1119
1120 return true;
1121 }
1122
1123 static bool
1124 visit_parallel_copy_src(nir_parallel_copy_instr *instr,
1125 nir_foreach_src_cb cb, void *state)
1126 {
1127 nir_foreach_parallel_copy_entry(entry, instr) {
1128 if (!visit_src(&entry->src, cb, state))
1129 return false;
1130 }
1131
1132 return true;
1133 }
1134
1135 typedef struct {
1136 void *state;
1137 nir_foreach_src_cb cb;
1138 } visit_dest_indirect_state;
1139
1140 static bool
1141 visit_dest_indirect(nir_dest *dest, void *_state)
1142 {
1143 visit_dest_indirect_state *state = (visit_dest_indirect_state *) _state;
1144
1145 if (!dest->is_ssa && dest->reg.indirect)
1146 return state->cb(dest->reg.indirect, state->state);
1147
1148 return true;
1149 }
1150
1151 bool
1152 nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state)
1153 {
1154 switch (instr->type) {
1155 case nir_instr_type_alu:
1156 if (!visit_alu_src(nir_instr_as_alu(instr), cb, state))
1157 return false;
1158 break;
1159 case nir_instr_type_deref:
1160 if (!visit_deref_instr_src(nir_instr_as_deref(instr), cb, state))
1161 return false;
1162 break;
1163 case nir_instr_type_intrinsic:
1164 if (!visit_intrinsic_src(nir_instr_as_intrinsic(instr), cb, state))
1165 return false;
1166 break;
1167 case nir_instr_type_tex:
1168 if (!visit_tex_src(nir_instr_as_tex(instr), cb, state))
1169 return false;
1170 break;
1171 case nir_instr_type_call:
1172 if (!visit_call_src(nir_instr_as_call(instr), cb, state))
1173 return false;
1174 break;
1175 case nir_instr_type_load_const:
1176 /* Constant load instructions have no regular sources */
1177 break;
1178 case nir_instr_type_phi:
1179 if (!visit_phi_src(nir_instr_as_phi(instr), cb, state))
1180 return false;
1181 break;
1182 case nir_instr_type_parallel_copy:
1183 if (!visit_parallel_copy_src(nir_instr_as_parallel_copy(instr),
1184 cb, state))
1185 return false;
1186 break;
1187 case nir_instr_type_jump:
1188 case nir_instr_type_ssa_undef:
1189 return true;
1190
1191 default:
1192 unreachable("Invalid instruction type");
1193 break;
1194 }
1195
1196 visit_dest_indirect_state dest_state;
1197 dest_state.state = state;
1198 dest_state.cb = cb;
1199 return nir_foreach_dest(instr, visit_dest_indirect, &dest_state);
1200 }
1201
1202 int64_t
1203 nir_src_comp_as_int(nir_src src, unsigned comp)
1204 {
1205 assert(nir_src_is_const(src));
1206 nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1207
1208 assert(comp < load->def.num_components);
1209 switch (load->def.bit_size) {
1210 /* int1_t uses 0/-1 convention */
1211 case 1: return -(int)load->value.b[comp];
1212 case 8: return load->value.i8[comp];
1213 case 16: return load->value.i16[comp];
1214 case 32: return load->value.i32[comp];
1215 case 64: return load->value.i64[comp];
1216 default:
1217 unreachable("Invalid bit size");
1218 }
1219 }
1220
1221 uint64_t
1222 nir_src_comp_as_uint(nir_src src, unsigned comp)
1223 {
1224 assert(nir_src_is_const(src));
1225 nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1226
1227 assert(comp < load->def.num_components);
1228 switch (load->def.bit_size) {
1229 case 1: return load->value.b[comp];
1230 case 8: return load->value.u8[comp];
1231 case 16: return load->value.u16[comp];
1232 case 32: return load->value.u32[comp];
1233 case 64: return load->value.u64[comp];
1234 default:
1235 unreachable("Invalid bit size");
1236 }
1237 }
1238
1239 bool
1240 nir_src_comp_as_bool(nir_src src, unsigned comp)
1241 {
1242 int64_t i = nir_src_comp_as_int(src, comp);
1243
1244 /* Booleans of any size use 0/-1 convention */
1245 assert(i == 0 || i == -1);
1246
1247 return i;
1248 }
1249
1250 double
1251 nir_src_comp_as_float(nir_src src, unsigned comp)
1252 {
1253 assert(nir_src_is_const(src));
1254 nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1255
1256 assert(comp < load->def.num_components);
1257 switch (load->def.bit_size) {
1258 case 16: return _mesa_half_to_float(load->value.u16[comp]);
1259 case 32: return load->value.f32[comp];
1260 case 64: return load->value.f64[comp];
1261 default:
1262 unreachable("Invalid bit size");
1263 }
1264 }
1265
1266 int64_t
1267 nir_src_as_int(nir_src src)
1268 {
1269 assert(nir_src_num_components(src) == 1);
1270 return nir_src_comp_as_int(src, 0);
1271 }
1272
1273 uint64_t
1274 nir_src_as_uint(nir_src src)
1275 {
1276 assert(nir_src_num_components(src) == 1);
1277 return nir_src_comp_as_uint(src, 0);
1278 }
1279
1280 bool
1281 nir_src_as_bool(nir_src src)
1282 {
1283 assert(nir_src_num_components(src) == 1);
1284 return nir_src_comp_as_bool(src, 0);
1285 }
1286
1287 double
1288 nir_src_as_float(nir_src src)
1289 {
1290 assert(nir_src_num_components(src) == 1);
1291 return nir_src_comp_as_float(src, 0);
1292 }
1293
1294 nir_const_value *
1295 nir_src_as_const_value(nir_src src)
1296 {
1297 if (!src.is_ssa)
1298 return NULL;
1299
1300 if (src.ssa->parent_instr->type != nir_instr_type_load_const)
1301 return NULL;
1302
1303 nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1304
1305 return &load->value;
1306 }
1307
1308 /**
1309 * Returns true if the source is known to be dynamically uniform. Otherwise it
1310 * returns false which means it may or may not be dynamically uniform but it
1311 * can't be determined.
1312 */
1313 bool
1314 nir_src_is_dynamically_uniform(nir_src src)
1315 {
1316 if (!src.is_ssa)
1317 return false;
1318
1319 /* Constants are trivially dynamically uniform */
1320 if (src.ssa->parent_instr->type == nir_instr_type_load_const)
1321 return true;
1322
1323 /* As are uniform variables */
1324 if (src.ssa->parent_instr->type == nir_instr_type_intrinsic) {
1325 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(src.ssa->parent_instr);
1326
1327 if (intr->intrinsic == nir_intrinsic_load_uniform)
1328 return true;
1329 }
1330
1331 /* XXX: this could have many more tests, such as when a sampler function is
1332 * called with dynamically uniform arguments.
1333 */
1334 return false;
1335 }
1336
1337 static void
1338 src_remove_all_uses(nir_src *src)
1339 {
1340 for (; src; src = src->is_ssa ? NULL : src->reg.indirect) {
1341 if (!src_is_valid(src))
1342 continue;
1343
1344 list_del(&src->use_link);
1345 }
1346 }
1347
1348 static void
1349 src_add_all_uses(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
1350 {
1351 for (; src; src = src->is_ssa ? NULL : src->reg.indirect) {
1352 if (!src_is_valid(src))
1353 continue;
1354
1355 if (parent_instr) {
1356 src->parent_instr = parent_instr;
1357 if (src->is_ssa)
1358 list_addtail(&src->use_link, &src->ssa->uses);
1359 else
1360 list_addtail(&src->use_link, &src->reg.reg->uses);
1361 } else {
1362 assert(parent_if);
1363 src->parent_if = parent_if;
1364 if (src->is_ssa)
1365 list_addtail(&src->use_link, &src->ssa->if_uses);
1366 else
1367 list_addtail(&src->use_link, &src->reg.reg->if_uses);
1368 }
1369 }
1370 }
1371
1372 void
1373 nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src)
1374 {
1375 assert(!src_is_valid(src) || src->parent_instr == instr);
1376
1377 src_remove_all_uses(src);
1378 *src = new_src;
1379 src_add_all_uses(src, instr, NULL);
1380 }
1381
1382 void
1383 nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src)
1384 {
1385 assert(!src_is_valid(dest) || dest->parent_instr == dest_instr);
1386
1387 src_remove_all_uses(dest);
1388 src_remove_all_uses(src);
1389 *dest = *src;
1390 *src = NIR_SRC_INIT;
1391 src_add_all_uses(dest, dest_instr, NULL);
1392 }
1393
1394 void
1395 nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src)
1396 {
1397 nir_src *src = &if_stmt->condition;
1398 assert(!src_is_valid(src) || src->parent_if == if_stmt);
1399
1400 src_remove_all_uses(src);
1401 *src = new_src;
1402 src_add_all_uses(src, NULL, if_stmt);
1403 }
1404
1405 void
1406 nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, nir_dest new_dest)
1407 {
1408 if (dest->is_ssa) {
1409 /* We can only overwrite an SSA destination if it has no uses. */
1410 assert(list_empty(&dest->ssa.uses) && list_empty(&dest->ssa.if_uses));
1411 } else {
1412 list_del(&dest->reg.def_link);
1413 if (dest->reg.indirect)
1414 src_remove_all_uses(dest->reg.indirect);
1415 }
1416
1417 /* We can't re-write with an SSA def */
1418 assert(!new_dest.is_ssa);
1419
1420 nir_dest_copy(dest, &new_dest, instr);
1421
1422 dest->reg.parent_instr = instr;
1423 list_addtail(&dest->reg.def_link, &new_dest.reg.reg->defs);
1424
1425 if (dest->reg.indirect)
1426 src_add_all_uses(dest->reg.indirect, instr, NULL);
1427 }
1428
1429 /* note: does *not* take ownership of 'name' */
1430 void
1431 nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
1432 unsigned num_components,
1433 unsigned bit_size, const char *name)
1434 {
1435 def->name = ralloc_strdup(instr, name);
1436 def->parent_instr = instr;
1437 list_inithead(&def->uses);
1438 list_inithead(&def->if_uses);
1439 def->num_components = num_components;
1440 def->bit_size = bit_size;
1441
1442 if (instr->block) {
1443 nir_function_impl *impl =
1444 nir_cf_node_get_function(&instr->block->cf_node);
1445
1446 def->index = impl->ssa_alloc++;
1447 } else {
1448 def->index = UINT_MAX;
1449 }
1450 }
1451
1452 /* note: does *not* take ownership of 'name' */
1453 void
1454 nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
1455 unsigned num_components, unsigned bit_size,
1456 const char *name)
1457 {
1458 dest->is_ssa = true;
1459 nir_ssa_def_init(instr, &dest->ssa, num_components, bit_size, name);
1460 }
1461
1462 void
1463 nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src)
1464 {
1465 assert(!new_src.is_ssa || def != new_src.ssa);
1466
1467 nir_foreach_use_safe(use_src, def)
1468 nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
1469
1470 nir_foreach_if_use_safe(use_src, def)
1471 nir_if_rewrite_condition(use_src->parent_if, new_src);
1472 }
1473
1474 static bool
1475 is_instr_between(nir_instr *start, nir_instr *end, nir_instr *between)
1476 {
1477 assert(start->block == end->block);
1478
1479 if (between->block != start->block)
1480 return false;
1481
1482 /* Search backwards looking for "between" */
1483 while (start != end) {
1484 if (between == end)
1485 return true;
1486
1487 end = nir_instr_prev(end);
1488 assert(end);
1489 }
1490
1491 return false;
1492 }
1493
1494 /* Replaces all uses of the given SSA def with the given source but only if
1495 * the use comes after the after_me instruction. This can be useful if you
1496 * are emitting code to fix up the result of some instruction: you can freely
1497 * use the result in that code and then call rewrite_uses_after and pass the
1498 * last fixup instruction as after_me and it will replace all of the uses you
1499 * want without touching the fixup code.
1500 *
1501 * This function assumes that after_me is in the same block as
1502 * def->parent_instr and that after_me comes after def->parent_instr.
1503 */
1504 void
1505 nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src,
1506 nir_instr *after_me)
1507 {
1508 assert(!new_src.is_ssa || def != new_src.ssa);
1509
1510 nir_foreach_use_safe(use_src, def) {
1511 assert(use_src->parent_instr != def->parent_instr);
1512 /* Since def already dominates all of its uses, the only way a use can
1513 * not be dominated by after_me is if it is between def and after_me in
1514 * the instruction list.
1515 */
1516 if (!is_instr_between(def->parent_instr, after_me, use_src->parent_instr))
1517 nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
1518 }
1519
1520 nir_foreach_if_use_safe(use_src, def)
1521 nir_if_rewrite_condition(use_src->parent_if, new_src);
1522 }
1523
1524 nir_component_mask_t
1525 nir_ssa_def_components_read(const nir_ssa_def *def)
1526 {
1527 nir_component_mask_t read_mask = 0;
1528 nir_foreach_use(use, def) {
1529 if (use->parent_instr->type == nir_instr_type_alu) {
1530 nir_alu_instr *alu = nir_instr_as_alu(use->parent_instr);
1531 nir_alu_src *alu_src = exec_node_data(nir_alu_src, use, src);
1532 int src_idx = alu_src - &alu->src[0];
1533 assert(src_idx >= 0 && src_idx < nir_op_infos[alu->op].num_inputs);
1534 read_mask |= nir_alu_instr_src_read_mask(alu, src_idx);
1535 } else {
1536 return (1 << def->num_components) - 1;
1537 }
1538 }
1539
1540 if (!list_empty(&def->if_uses))
1541 read_mask |= 1;
1542
1543 return read_mask;
1544 }
1545
1546 nir_block *
1547 nir_block_cf_tree_next(nir_block *block)
1548 {
1549 if (block == NULL) {
1550 /* nir_foreach_block_safe() will call this function on a NULL block
1551 * after the last iteration, but it won't use the result so just return
1552 * NULL here.
1553 */
1554 return NULL;
1555 }
1556
1557 nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node);
1558 if (cf_next)
1559 return nir_cf_node_cf_tree_first(cf_next);
1560
1561 nir_cf_node *parent = block->cf_node.parent;
1562
1563 switch (parent->type) {
1564 case nir_cf_node_if: {
1565 /* Are we at the end of the if? Go to the beginning of the else */
1566 nir_if *if_stmt = nir_cf_node_as_if(parent);
1567 if (block == nir_if_last_then_block(if_stmt))
1568 return nir_if_first_else_block(if_stmt);
1569
1570 assert(block == nir_if_last_else_block(if_stmt));
1571 /* fall through */
1572 }
1573
1574 case nir_cf_node_loop:
1575 return nir_cf_node_as_block(nir_cf_node_next(parent));
1576
1577 case nir_cf_node_function:
1578 return NULL;
1579
1580 default:
1581 unreachable("unknown cf node type");
1582 }
1583 }
1584
1585 nir_block *
1586 nir_block_cf_tree_prev(nir_block *block)
1587 {
1588 if (block == NULL) {
1589 /* do this for consistency with nir_block_cf_tree_next() */
1590 return NULL;
1591 }
1592
1593 nir_cf_node *cf_prev = nir_cf_node_prev(&block->cf_node);
1594 if (cf_prev)
1595 return nir_cf_node_cf_tree_last(cf_prev);
1596
1597 nir_cf_node *parent = block->cf_node.parent;
1598
1599 switch (parent->type) {
1600 case nir_cf_node_if: {
1601 /* Are we at the beginning of the else? Go to the end of the if */
1602 nir_if *if_stmt = nir_cf_node_as_if(parent);
1603 if (block == nir_if_first_else_block(if_stmt))
1604 return nir_if_last_then_block(if_stmt);
1605
1606 assert(block == nir_if_first_then_block(if_stmt));
1607 /* fall through */
1608 }
1609
1610 case nir_cf_node_loop:
1611 return nir_cf_node_as_block(nir_cf_node_prev(parent));
1612
1613 case nir_cf_node_function:
1614 return NULL;
1615
1616 default:
1617 unreachable("unknown cf node type");
1618 }
1619 }
1620
1621 nir_block *nir_cf_node_cf_tree_first(nir_cf_node *node)
1622 {
1623 switch (node->type) {
1624 case nir_cf_node_function: {
1625 nir_function_impl *impl = nir_cf_node_as_function(node);
1626 return nir_start_block(impl);
1627 }
1628
1629 case nir_cf_node_if: {
1630 nir_if *if_stmt = nir_cf_node_as_if(node);
1631 return nir_if_first_then_block(if_stmt);
1632 }
1633
1634 case nir_cf_node_loop: {
1635 nir_loop *loop = nir_cf_node_as_loop(node);
1636 return nir_loop_first_block(loop);
1637 }
1638
1639 case nir_cf_node_block: {
1640 return nir_cf_node_as_block(node);
1641 }
1642
1643 default:
1644 unreachable("unknown node type");
1645 }
1646 }
1647
1648 nir_block *nir_cf_node_cf_tree_last(nir_cf_node *node)
1649 {
1650 switch (node->type) {
1651 case nir_cf_node_function: {
1652 nir_function_impl *impl = nir_cf_node_as_function(node);
1653 return nir_impl_last_block(impl);
1654 }
1655
1656 case nir_cf_node_if: {
1657 nir_if *if_stmt = nir_cf_node_as_if(node);
1658 return nir_if_last_else_block(if_stmt);
1659 }
1660
1661 case nir_cf_node_loop: {
1662 nir_loop *loop = nir_cf_node_as_loop(node);
1663 return nir_loop_last_block(loop);
1664 }
1665
1666 case nir_cf_node_block: {
1667 return nir_cf_node_as_block(node);
1668 }
1669
1670 default:
1671 unreachable("unknown node type");
1672 }
1673 }
1674
1675 nir_block *nir_cf_node_cf_tree_next(nir_cf_node *node)
1676 {
1677 if (node->type == nir_cf_node_block)
1678 return nir_block_cf_tree_next(nir_cf_node_as_block(node));
1679 else if (node->type == nir_cf_node_function)
1680 return NULL;
1681 else
1682 return nir_cf_node_as_block(nir_cf_node_next(node));
1683 }
1684
1685 nir_if *
1686 nir_block_get_following_if(nir_block *block)
1687 {
1688 if (exec_node_is_tail_sentinel(&block->cf_node.node))
1689 return NULL;
1690
1691 if (nir_cf_node_is_last(&block->cf_node))
1692 return NULL;
1693
1694 nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
1695
1696 if (next_node->type != nir_cf_node_if)
1697 return NULL;
1698
1699 return nir_cf_node_as_if(next_node);
1700 }
1701
1702 nir_loop *
1703 nir_block_get_following_loop(nir_block *block)
1704 {
1705 if (exec_node_is_tail_sentinel(&block->cf_node.node))
1706 return NULL;
1707
1708 if (nir_cf_node_is_last(&block->cf_node))
1709 return NULL;
1710
1711 nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
1712
1713 if (next_node->type != nir_cf_node_loop)
1714 return NULL;
1715
1716 return nir_cf_node_as_loop(next_node);
1717 }
1718
1719 void
1720 nir_index_blocks(nir_function_impl *impl)
1721 {
1722 unsigned index = 0;
1723
1724 if (impl->valid_metadata & nir_metadata_block_index)
1725 return;
1726
1727 nir_foreach_block(block, impl) {
1728 block->index = index++;
1729 }
1730
1731 /* The end_block isn't really part of the program, which is why its index
1732 * is >= num_blocks.
1733 */
1734 impl->num_blocks = impl->end_block->index = index;
1735 }
1736
1737 static bool
1738 index_ssa_def_cb(nir_ssa_def *def, void *state)
1739 {
1740 unsigned *index = (unsigned *) state;
1741 def->index = (*index)++;
1742
1743 return true;
1744 }
1745
1746 /**
1747 * The indices are applied top-to-bottom which has the very nice property
1748 * that, if A dominates B, then A->index <= B->index.
1749 */
1750 void
1751 nir_index_ssa_defs(nir_function_impl *impl)
1752 {
1753 unsigned index = 0;
1754
1755 nir_foreach_block(block, impl) {
1756 nir_foreach_instr(instr, block)
1757 nir_foreach_ssa_def(instr, index_ssa_def_cb, &index);
1758 }
1759
1760 impl->ssa_alloc = index;
1761 }
1762
1763 /**
1764 * The indices are applied top-to-bottom which has the very nice property
1765 * that, if A dominates B, then A->index <= B->index.
1766 */
1767 unsigned
1768 nir_index_instrs(nir_function_impl *impl)
1769 {
1770 unsigned index = 0;
1771
1772 nir_foreach_block(block, impl) {
1773 nir_foreach_instr(instr, block)
1774 instr->index = index++;
1775 }
1776
1777 return index;
1778 }
1779
1780 nir_intrinsic_op
1781 nir_intrinsic_from_system_value(gl_system_value val)
1782 {
1783 switch (val) {
1784 case SYSTEM_VALUE_VERTEX_ID:
1785 return nir_intrinsic_load_vertex_id;
1786 case SYSTEM_VALUE_INSTANCE_ID:
1787 return nir_intrinsic_load_instance_id;
1788 case SYSTEM_VALUE_DRAW_ID:
1789 return nir_intrinsic_load_draw_id;
1790 case SYSTEM_VALUE_BASE_INSTANCE:
1791 return nir_intrinsic_load_base_instance;
1792 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
1793 return nir_intrinsic_load_vertex_id_zero_base;
1794 case SYSTEM_VALUE_IS_INDEXED_DRAW:
1795 return nir_intrinsic_load_is_indexed_draw;
1796 case SYSTEM_VALUE_FIRST_VERTEX:
1797 return nir_intrinsic_load_first_vertex;
1798 case SYSTEM_VALUE_BASE_VERTEX:
1799 return nir_intrinsic_load_base_vertex;
1800 case SYSTEM_VALUE_INVOCATION_ID:
1801 return nir_intrinsic_load_invocation_id;
1802 case SYSTEM_VALUE_FRAG_COORD:
1803 return nir_intrinsic_load_frag_coord;
1804 case SYSTEM_VALUE_FRONT_FACE:
1805 return nir_intrinsic_load_front_face;
1806 case SYSTEM_VALUE_SAMPLE_ID:
1807 return nir_intrinsic_load_sample_id;
1808 case SYSTEM_VALUE_SAMPLE_POS:
1809 return nir_intrinsic_load_sample_pos;
1810 case SYSTEM_VALUE_SAMPLE_MASK_IN:
1811 return nir_intrinsic_load_sample_mask_in;
1812 case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
1813 return nir_intrinsic_load_local_invocation_id;
1814 case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX:
1815 return nir_intrinsic_load_local_invocation_index;
1816 case SYSTEM_VALUE_WORK_GROUP_ID:
1817 return nir_intrinsic_load_work_group_id;
1818 case SYSTEM_VALUE_NUM_WORK_GROUPS:
1819 return nir_intrinsic_load_num_work_groups;
1820 case SYSTEM_VALUE_PRIMITIVE_ID:
1821 return nir_intrinsic_load_primitive_id;
1822 case SYSTEM_VALUE_TESS_COORD:
1823 return nir_intrinsic_load_tess_coord;
1824 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1825 return nir_intrinsic_load_tess_level_outer;
1826 case SYSTEM_VALUE_TESS_LEVEL_INNER:
1827 return nir_intrinsic_load_tess_level_inner;
1828 case SYSTEM_VALUE_VERTICES_IN:
1829 return nir_intrinsic_load_patch_vertices_in;
1830 case SYSTEM_VALUE_HELPER_INVOCATION:
1831 return nir_intrinsic_load_helper_invocation;
1832 case SYSTEM_VALUE_VIEW_INDEX:
1833 return nir_intrinsic_load_view_index;
1834 case SYSTEM_VALUE_SUBGROUP_SIZE:
1835 return nir_intrinsic_load_subgroup_size;
1836 case SYSTEM_VALUE_SUBGROUP_INVOCATION:
1837 return nir_intrinsic_load_subgroup_invocation;
1838 case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
1839 return nir_intrinsic_load_subgroup_eq_mask;
1840 case SYSTEM_VALUE_SUBGROUP_GE_MASK:
1841 return nir_intrinsic_load_subgroup_ge_mask;
1842 case SYSTEM_VALUE_SUBGROUP_GT_MASK:
1843 return nir_intrinsic_load_subgroup_gt_mask;
1844 case SYSTEM_VALUE_SUBGROUP_LE_MASK:
1845 return nir_intrinsic_load_subgroup_le_mask;
1846 case SYSTEM_VALUE_SUBGROUP_LT_MASK:
1847 return nir_intrinsic_load_subgroup_lt_mask;
1848 case SYSTEM_VALUE_NUM_SUBGROUPS:
1849 return nir_intrinsic_load_num_subgroups;
1850 case SYSTEM_VALUE_SUBGROUP_ID:
1851 return nir_intrinsic_load_subgroup_id;
1852 case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
1853 return nir_intrinsic_load_local_group_size;
1854 case SYSTEM_VALUE_GLOBAL_INVOCATION_ID:
1855 return nir_intrinsic_load_global_invocation_id;
1856 case SYSTEM_VALUE_WORK_DIM:
1857 return nir_intrinsic_load_work_dim;
1858 default:
1859 unreachable("system value does not directly correspond to intrinsic");
1860 }
1861 }
1862
1863 gl_system_value
1864 nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
1865 {
1866 switch (intrin) {
1867 case nir_intrinsic_load_vertex_id:
1868 return SYSTEM_VALUE_VERTEX_ID;
1869 case nir_intrinsic_load_instance_id:
1870 return SYSTEM_VALUE_INSTANCE_ID;
1871 case nir_intrinsic_load_draw_id:
1872 return SYSTEM_VALUE_DRAW_ID;
1873 case nir_intrinsic_load_base_instance:
1874 return SYSTEM_VALUE_BASE_INSTANCE;
1875 case nir_intrinsic_load_vertex_id_zero_base:
1876 return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
1877 case nir_intrinsic_load_first_vertex:
1878 return SYSTEM_VALUE_FIRST_VERTEX;
1879 case nir_intrinsic_load_is_indexed_draw:
1880 return SYSTEM_VALUE_IS_INDEXED_DRAW;
1881 case nir_intrinsic_load_base_vertex:
1882 return SYSTEM_VALUE_BASE_VERTEX;
1883 case nir_intrinsic_load_invocation_id:
1884 return SYSTEM_VALUE_INVOCATION_ID;
1885 case nir_intrinsic_load_frag_coord:
1886 return SYSTEM_VALUE_FRAG_COORD;
1887 case nir_intrinsic_load_front_face:
1888 return SYSTEM_VALUE_FRONT_FACE;
1889 case nir_intrinsic_load_sample_id:
1890 return SYSTEM_VALUE_SAMPLE_ID;
1891 case nir_intrinsic_load_sample_pos:
1892 return SYSTEM_VALUE_SAMPLE_POS;
1893 case nir_intrinsic_load_sample_mask_in:
1894 return SYSTEM_VALUE_SAMPLE_MASK_IN;
1895 case nir_intrinsic_load_local_invocation_id:
1896 return SYSTEM_VALUE_LOCAL_INVOCATION_ID;
1897 case nir_intrinsic_load_local_invocation_index:
1898 return SYSTEM_VALUE_LOCAL_INVOCATION_INDEX;
1899 case nir_intrinsic_load_num_work_groups:
1900 return SYSTEM_VALUE_NUM_WORK_GROUPS;
1901 case nir_intrinsic_load_work_group_id:
1902 return SYSTEM_VALUE_WORK_GROUP_ID;
1903 case nir_intrinsic_load_primitive_id:
1904 return SYSTEM_VALUE_PRIMITIVE_ID;
1905 case nir_intrinsic_load_tess_coord:
1906 return SYSTEM_VALUE_TESS_COORD;
1907 case nir_intrinsic_load_tess_level_outer:
1908 return SYSTEM_VALUE_TESS_LEVEL_OUTER;
1909 case nir_intrinsic_load_tess_level_inner:
1910 return SYSTEM_VALUE_TESS_LEVEL_INNER;
1911 case nir_intrinsic_load_patch_vertices_in:
1912 return SYSTEM_VALUE_VERTICES_IN;
1913 case nir_intrinsic_load_helper_invocation:
1914 return SYSTEM_VALUE_HELPER_INVOCATION;
1915 case nir_intrinsic_load_view_index:
1916 return SYSTEM_VALUE_VIEW_INDEX;
1917 case nir_intrinsic_load_subgroup_size:
1918 return SYSTEM_VALUE_SUBGROUP_SIZE;
1919 case nir_intrinsic_load_subgroup_invocation:
1920 return SYSTEM_VALUE_SUBGROUP_INVOCATION;
1921 case nir_intrinsic_load_subgroup_eq_mask:
1922 return SYSTEM_VALUE_SUBGROUP_EQ_MASK;
1923 case nir_intrinsic_load_subgroup_ge_mask:
1924 return SYSTEM_VALUE_SUBGROUP_GE_MASK;
1925 case nir_intrinsic_load_subgroup_gt_mask:
1926 return SYSTEM_VALUE_SUBGROUP_GT_MASK;
1927 case nir_intrinsic_load_subgroup_le_mask:
1928 return SYSTEM_VALUE_SUBGROUP_LE_MASK;
1929 case nir_intrinsic_load_subgroup_lt_mask:
1930 return SYSTEM_VALUE_SUBGROUP_LT_MASK;
1931 case nir_intrinsic_load_num_subgroups:
1932 return SYSTEM_VALUE_NUM_SUBGROUPS;
1933 case nir_intrinsic_load_subgroup_id:
1934 return SYSTEM_VALUE_SUBGROUP_ID;
1935 case nir_intrinsic_load_local_group_size:
1936 return SYSTEM_VALUE_LOCAL_GROUP_SIZE;
1937 case nir_intrinsic_load_global_invocation_id:
1938 return SYSTEM_VALUE_GLOBAL_INVOCATION_ID;
1939 default:
1940 unreachable("intrinsic doesn't produce a system value");
1941 }
1942 }
1943
1944 /* OpenGL utility method that remaps the location attributes if they are
1945 * doubles. Not needed for vulkan due the differences on the input location
1946 * count for doubles on vulkan vs OpenGL
1947 *
1948 * The bitfield returned in dual_slot is one bit for each double input slot in
1949 * the original OpenGL single-slot input numbering. The mapping from old
1950 * locations to new locations is as follows:
1951 *
1952 * new_loc = loc + util_bitcount(dual_slot & BITFIELD64_MASK(loc))
1953 */
1954 void
1955 nir_remap_dual_slot_attributes(nir_shader *shader, uint64_t *dual_slot)
1956 {
1957 assert(shader->info.stage == MESA_SHADER_VERTEX);
1958
1959 *dual_slot = 0;
1960 nir_foreach_variable(var, &shader->inputs) {
1961 if (glsl_type_is_dual_slot(glsl_without_array(var->type))) {
1962 unsigned slots = glsl_count_attribute_slots(var->type, true);
1963 *dual_slot |= BITFIELD64_MASK(slots) << var->data.location;
1964 }
1965 }
1966
1967 nir_foreach_variable(var, &shader->inputs) {
1968 var->data.location +=
1969 util_bitcount64(*dual_slot & BITFIELD64_MASK(var->data.location));
1970 }
1971 }
1972
1973 /* Returns an attribute mask that has been re-compacted using the given
1974 * dual_slot mask.
1975 */
1976 uint64_t
1977 nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot)
1978 {
1979 while (dual_slot) {
1980 unsigned loc = u_bit_scan64(&dual_slot);
1981 /* mask of all bits up to and including loc */
1982 uint64_t mask = BITFIELD64_MASK(loc + 1);
1983 attribs = (attribs & mask) | ((attribs & ~mask) >> 1);
1984 }
1985 return attribs;
1986 }