249b9357c3fa57aab3c369004eaaf4d23d6da1d5
[mesa.git] / src / compiler / nir / nir.c
1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Connor Abbott (cwabbott0@gmail.com)
25 *
26 */
27
28 #include "nir.h"
29 #include "nir_control_flow_private.h"
30 #include "util/half_float.h"
31 #include <limits.h>
32 #include <assert.h>
33 #include <math.h>
34 #include "util/u_math.h"
35
36 #include "main/menums.h" /* BITFIELD64_MASK */
37
38 nir_shader *
39 nir_shader_create(void *mem_ctx,
40 gl_shader_stage stage,
41 const nir_shader_compiler_options *options,
42 shader_info *si)
43 {
44 nir_shader *shader = rzalloc(mem_ctx, nir_shader);
45
46 exec_list_make_empty(&shader->uniforms);
47 exec_list_make_empty(&shader->inputs);
48 exec_list_make_empty(&shader->outputs);
49 exec_list_make_empty(&shader->shared);
50
51 shader->options = options;
52
53 if (si) {
54 assert(si->stage == stage);
55 shader->info = *si;
56 } else {
57 shader->info.stage = stage;
58 }
59
60 exec_list_make_empty(&shader->functions);
61 exec_list_make_empty(&shader->registers);
62 exec_list_make_empty(&shader->globals);
63 exec_list_make_empty(&shader->system_values);
64 shader->reg_alloc = 0;
65
66 shader->num_inputs = 0;
67 shader->num_outputs = 0;
68 shader->num_uniforms = 0;
69 shader->num_shared = 0;
70
71 return shader;
72 }
73
74 static nir_register *
75 reg_create(void *mem_ctx, struct exec_list *list)
76 {
77 nir_register *reg = ralloc(mem_ctx, nir_register);
78
79 list_inithead(&reg->uses);
80 list_inithead(&reg->defs);
81 list_inithead(&reg->if_uses);
82
83 reg->num_components = 0;
84 reg->bit_size = 32;
85 reg->num_array_elems = 0;
86 reg->is_packed = false;
87 reg->name = NULL;
88
89 exec_list_push_tail(list, &reg->node);
90
91 return reg;
92 }
93
94 nir_register *
95 nir_global_reg_create(nir_shader *shader)
96 {
97 nir_register *reg = reg_create(shader, &shader->registers);
98 reg->index = shader->reg_alloc++;
99 reg->is_global = true;
100
101 return reg;
102 }
103
104 nir_register *
105 nir_local_reg_create(nir_function_impl *impl)
106 {
107 nir_register *reg = reg_create(ralloc_parent(impl), &impl->registers);
108 reg->index = impl->reg_alloc++;
109 reg->is_global = false;
110
111 return reg;
112 }
113
114 void
115 nir_reg_remove(nir_register *reg)
116 {
117 exec_node_remove(&reg->node);
118 }
119
120 void
121 nir_shader_add_variable(nir_shader *shader, nir_variable *var)
122 {
123 switch (var->data.mode) {
124 case nir_var_all:
125 assert(!"invalid mode");
126 break;
127
128 case nir_var_local:
129 assert(!"nir_shader_add_variable cannot be used for local variables");
130 break;
131
132 case nir_var_global:
133 exec_list_push_tail(&shader->globals, &var->node);
134 break;
135
136 case nir_var_shader_in:
137 exec_list_push_tail(&shader->inputs, &var->node);
138 break;
139
140 case nir_var_shader_out:
141 exec_list_push_tail(&shader->outputs, &var->node);
142 break;
143
144 case nir_var_uniform:
145 case nir_var_shader_storage:
146 exec_list_push_tail(&shader->uniforms, &var->node);
147 break;
148
149 case nir_var_shared:
150 assert(shader->info.stage == MESA_SHADER_COMPUTE);
151 exec_list_push_tail(&shader->shared, &var->node);
152 break;
153
154 case nir_var_system_value:
155 exec_list_push_tail(&shader->system_values, &var->node);
156 break;
157 }
158 }
159
160 nir_variable *
161 nir_variable_create(nir_shader *shader, nir_variable_mode mode,
162 const struct glsl_type *type, const char *name)
163 {
164 nir_variable *var = rzalloc(shader, nir_variable);
165 var->name = ralloc_strdup(var, name);
166 var->type = type;
167 var->data.mode = mode;
168 var->data.how_declared = nir_var_declared_normally;
169
170 if ((mode == nir_var_shader_in &&
171 shader->info.stage != MESA_SHADER_VERTEX) ||
172 (mode == nir_var_shader_out &&
173 shader->info.stage != MESA_SHADER_FRAGMENT))
174 var->data.interpolation = INTERP_MODE_SMOOTH;
175
176 if (mode == nir_var_shader_in || mode == nir_var_uniform)
177 var->data.read_only = true;
178
179 nir_shader_add_variable(shader, var);
180
181 return var;
182 }
183
184 nir_variable *
185 nir_local_variable_create(nir_function_impl *impl,
186 const struct glsl_type *type, const char *name)
187 {
188 nir_variable *var = rzalloc(impl->function->shader, nir_variable);
189 var->name = ralloc_strdup(var, name);
190 var->type = type;
191 var->data.mode = nir_var_local;
192
193 nir_function_impl_add_variable(impl, var);
194
195 return var;
196 }
197
198 nir_function *
199 nir_function_create(nir_shader *shader, const char *name)
200 {
201 nir_function *func = ralloc(shader, nir_function);
202
203 exec_list_push_tail(&shader->functions, &func->node);
204
205 func->name = ralloc_strdup(func, name);
206 func->shader = shader;
207 func->num_params = 0;
208 func->params = NULL;
209 func->impl = NULL;
210
211 return func;
212 }
213
214 /* NOTE: if the instruction you are copying a src to is already added
215 * to the IR, use nir_instr_rewrite_src() instead.
216 */
217 void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx)
218 {
219 dest->is_ssa = src->is_ssa;
220 if (src->is_ssa) {
221 dest->ssa = src->ssa;
222 } else {
223 dest->reg.base_offset = src->reg.base_offset;
224 dest->reg.reg = src->reg.reg;
225 if (src->reg.indirect) {
226 dest->reg.indirect = ralloc(mem_ctx, nir_src);
227 nir_src_copy(dest->reg.indirect, src->reg.indirect, mem_ctx);
228 } else {
229 dest->reg.indirect = NULL;
230 }
231 }
232 }
233
234 void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr)
235 {
236 /* Copying an SSA definition makes no sense whatsoever. */
237 assert(!src->is_ssa);
238
239 dest->is_ssa = false;
240
241 dest->reg.base_offset = src->reg.base_offset;
242 dest->reg.reg = src->reg.reg;
243 if (src->reg.indirect) {
244 dest->reg.indirect = ralloc(instr, nir_src);
245 nir_src_copy(dest->reg.indirect, src->reg.indirect, instr);
246 } else {
247 dest->reg.indirect = NULL;
248 }
249 }
250
251 void
252 nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src,
253 nir_alu_instr *instr)
254 {
255 nir_src_copy(&dest->src, &src->src, &instr->instr);
256 dest->abs = src->abs;
257 dest->negate = src->negate;
258 for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++)
259 dest->swizzle[i] = src->swizzle[i];
260 }
261
262 void
263 nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
264 nir_alu_instr *instr)
265 {
266 nir_dest_copy(&dest->dest, &src->dest, &instr->instr);
267 dest->write_mask = src->write_mask;
268 dest->saturate = src->saturate;
269 }
270
271
272 static void
273 cf_init(nir_cf_node *node, nir_cf_node_type type)
274 {
275 exec_node_init(&node->node);
276 node->parent = NULL;
277 node->type = type;
278 }
279
280 nir_function_impl *
281 nir_function_impl_create_bare(nir_shader *shader)
282 {
283 nir_function_impl *impl = ralloc(shader, nir_function_impl);
284
285 impl->function = NULL;
286
287 cf_init(&impl->cf_node, nir_cf_node_function);
288
289 exec_list_make_empty(&impl->body);
290 exec_list_make_empty(&impl->registers);
291 exec_list_make_empty(&impl->locals);
292 impl->reg_alloc = 0;
293 impl->ssa_alloc = 0;
294 impl->valid_metadata = nir_metadata_none;
295
296 /* create start & end blocks */
297 nir_block *start_block = nir_block_create(shader);
298 nir_block *end_block = nir_block_create(shader);
299 start_block->cf_node.parent = &impl->cf_node;
300 end_block->cf_node.parent = &impl->cf_node;
301 impl->end_block = end_block;
302
303 exec_list_push_tail(&impl->body, &start_block->cf_node.node);
304
305 start_block->successors[0] = end_block;
306 _mesa_set_add(end_block->predecessors, start_block);
307 return impl;
308 }
309
310 nir_function_impl *
311 nir_function_impl_create(nir_function *function)
312 {
313 assert(function->impl == NULL);
314
315 nir_function_impl *impl = nir_function_impl_create_bare(function->shader);
316
317 function->impl = impl;
318 impl->function = function;
319
320 return impl;
321 }
322
323 nir_block *
324 nir_block_create(nir_shader *shader)
325 {
326 nir_block *block = rzalloc(shader, nir_block);
327
328 cf_init(&block->cf_node, nir_cf_node_block);
329
330 block->successors[0] = block->successors[1] = NULL;
331 block->predecessors = _mesa_set_create(block, _mesa_hash_pointer,
332 _mesa_key_pointer_equal);
333 block->imm_dom = NULL;
334 /* XXX maybe it would be worth it to defer allocation? This
335 * way it doesn't get allocated for shader refs that never run
336 * nir_calc_dominance? For example, state-tracker creates an
337 * initial IR, clones that, runs appropriate lowering pass, passes
338 * to driver which does common lowering/opt, and then stores ref
339 * which is later used to do state specific lowering and futher
340 * opt. Do any of the references not need dominance metadata?
341 */
342 block->dom_frontier = _mesa_set_create(block, _mesa_hash_pointer,
343 _mesa_key_pointer_equal);
344
345 exec_list_make_empty(&block->instr_list);
346
347 return block;
348 }
349
350 static inline void
351 src_init(nir_src *src)
352 {
353 src->is_ssa = false;
354 src->reg.reg = NULL;
355 src->reg.indirect = NULL;
356 src->reg.base_offset = 0;
357 }
358
359 nir_if *
360 nir_if_create(nir_shader *shader)
361 {
362 nir_if *if_stmt = ralloc(shader, nir_if);
363
364 cf_init(&if_stmt->cf_node, nir_cf_node_if);
365 src_init(&if_stmt->condition);
366
367 nir_block *then = nir_block_create(shader);
368 exec_list_make_empty(&if_stmt->then_list);
369 exec_list_push_tail(&if_stmt->then_list, &then->cf_node.node);
370 then->cf_node.parent = &if_stmt->cf_node;
371
372 nir_block *else_stmt = nir_block_create(shader);
373 exec_list_make_empty(&if_stmt->else_list);
374 exec_list_push_tail(&if_stmt->else_list, &else_stmt->cf_node.node);
375 else_stmt->cf_node.parent = &if_stmt->cf_node;
376
377 return if_stmt;
378 }
379
380 nir_loop *
381 nir_loop_create(nir_shader *shader)
382 {
383 nir_loop *loop = rzalloc(shader, nir_loop);
384
385 cf_init(&loop->cf_node, nir_cf_node_loop);
386
387 nir_block *body = nir_block_create(shader);
388 exec_list_make_empty(&loop->body);
389 exec_list_push_tail(&loop->body, &body->cf_node.node);
390 body->cf_node.parent = &loop->cf_node;
391
392 body->successors[0] = body;
393 _mesa_set_add(body->predecessors, body);
394
395 return loop;
396 }
397
398 static void
399 instr_init(nir_instr *instr, nir_instr_type type)
400 {
401 instr->type = type;
402 instr->block = NULL;
403 exec_node_init(&instr->node);
404 }
405
406 static void
407 dest_init(nir_dest *dest)
408 {
409 dest->is_ssa = false;
410 dest->reg.reg = NULL;
411 dest->reg.indirect = NULL;
412 dest->reg.base_offset = 0;
413 }
414
415 static void
416 alu_dest_init(nir_alu_dest *dest)
417 {
418 dest_init(&dest->dest);
419 dest->saturate = false;
420 dest->write_mask = 0xf;
421 }
422
423 static void
424 alu_src_init(nir_alu_src *src)
425 {
426 src_init(&src->src);
427 src->abs = src->negate = false;
428 for (int i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i)
429 src->swizzle[i] = i;
430 }
431
432 nir_alu_instr *
433 nir_alu_instr_create(nir_shader *shader, nir_op op)
434 {
435 unsigned num_srcs = nir_op_infos[op].num_inputs;
436 /* TODO: don't use rzalloc */
437 nir_alu_instr *instr =
438 rzalloc_size(shader,
439 sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src));
440
441 instr_init(&instr->instr, nir_instr_type_alu);
442 instr->op = op;
443 alu_dest_init(&instr->dest);
444 for (unsigned i = 0; i < num_srcs; i++)
445 alu_src_init(&instr->src[i]);
446
447 return instr;
448 }
449
450 nir_deref_instr *
451 nir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type)
452 {
453 nir_deref_instr *instr =
454 rzalloc_size(shader, sizeof(nir_deref_instr));
455
456 instr_init(&instr->instr, nir_instr_type_deref);
457
458 instr->deref_type = deref_type;
459 if (deref_type != nir_deref_type_var)
460 src_init(&instr->parent);
461
462 if (deref_type == nir_deref_type_array)
463 src_init(&instr->arr.index);
464
465 dest_init(&instr->dest);
466
467 return instr;
468 }
469
470 nir_jump_instr *
471 nir_jump_instr_create(nir_shader *shader, nir_jump_type type)
472 {
473 nir_jump_instr *instr = ralloc(shader, nir_jump_instr);
474 instr_init(&instr->instr, nir_instr_type_jump);
475 instr->type = type;
476 return instr;
477 }
478
479 nir_load_const_instr *
480 nir_load_const_instr_create(nir_shader *shader, unsigned num_components,
481 unsigned bit_size)
482 {
483 nir_load_const_instr *instr = rzalloc(shader, nir_load_const_instr);
484 instr_init(&instr->instr, nir_instr_type_load_const);
485
486 nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size, NULL);
487
488 return instr;
489 }
490
491 nir_intrinsic_instr *
492 nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op)
493 {
494 unsigned num_srcs = nir_intrinsic_infos[op].num_srcs;
495 /* TODO: don't use rzalloc */
496 nir_intrinsic_instr *instr =
497 rzalloc_size(shader,
498 sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src));
499
500 instr_init(&instr->instr, nir_instr_type_intrinsic);
501 instr->intrinsic = op;
502
503 if (nir_intrinsic_infos[op].has_dest)
504 dest_init(&instr->dest);
505
506 for (unsigned i = 0; i < num_srcs; i++)
507 src_init(&instr->src[i]);
508
509 return instr;
510 }
511
512 nir_call_instr *
513 nir_call_instr_create(nir_shader *shader, nir_function *callee)
514 {
515 const unsigned num_params = callee->num_params;
516 nir_call_instr *instr =
517 rzalloc_size(shader, sizeof(*instr) +
518 num_params * sizeof(instr->params[0]));
519
520 instr_init(&instr->instr, nir_instr_type_call);
521 instr->callee = callee;
522 instr->num_params = num_params;
523 for (unsigned i = 0; i < num_params; i++)
524 src_init(&instr->params[i]);
525
526 return instr;
527 }
528
529 nir_tex_instr *
530 nir_tex_instr_create(nir_shader *shader, unsigned num_srcs)
531 {
532 nir_tex_instr *instr = rzalloc(shader, nir_tex_instr);
533 instr_init(&instr->instr, nir_instr_type_tex);
534
535 dest_init(&instr->dest);
536
537 instr->num_srcs = num_srcs;
538 instr->src = ralloc_array(instr, nir_tex_src, num_srcs);
539 for (unsigned i = 0; i < num_srcs; i++)
540 src_init(&instr->src[i].src);
541
542 instr->texture_index = 0;
543 instr->texture_array_size = 0;
544 instr->sampler_index = 0;
545
546 return instr;
547 }
548
549 void
550 nir_tex_instr_add_src(nir_tex_instr *tex,
551 nir_tex_src_type src_type,
552 nir_src src)
553 {
554 nir_tex_src *new_srcs = rzalloc_array(tex, nir_tex_src,
555 tex->num_srcs + 1);
556
557 for (unsigned i = 0; i < tex->num_srcs; i++) {
558 new_srcs[i].src_type = tex->src[i].src_type;
559 nir_instr_move_src(&tex->instr, &new_srcs[i].src,
560 &tex->src[i].src);
561 }
562
563 ralloc_free(tex->src);
564 tex->src = new_srcs;
565
566 tex->src[tex->num_srcs].src_type = src_type;
567 nir_instr_rewrite_src(&tex->instr, &tex->src[tex->num_srcs].src, src);
568 tex->num_srcs++;
569 }
570
571 void
572 nir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx)
573 {
574 assert(src_idx < tex->num_srcs);
575
576 /* First rewrite the source to NIR_SRC_INIT */
577 nir_instr_rewrite_src(&tex->instr, &tex->src[src_idx].src, NIR_SRC_INIT);
578
579 /* Now, move all of the other sources down */
580 for (unsigned i = src_idx + 1; i < tex->num_srcs; i++) {
581 tex->src[i-1].src_type = tex->src[i].src_type;
582 nir_instr_move_src(&tex->instr, &tex->src[i-1].src, &tex->src[i].src);
583 }
584 tex->num_srcs--;
585 }
586
587 nir_phi_instr *
588 nir_phi_instr_create(nir_shader *shader)
589 {
590 nir_phi_instr *instr = ralloc(shader, nir_phi_instr);
591 instr_init(&instr->instr, nir_instr_type_phi);
592
593 dest_init(&instr->dest);
594 exec_list_make_empty(&instr->srcs);
595 return instr;
596 }
597
598 nir_parallel_copy_instr *
599 nir_parallel_copy_instr_create(nir_shader *shader)
600 {
601 nir_parallel_copy_instr *instr = ralloc(shader, nir_parallel_copy_instr);
602 instr_init(&instr->instr, nir_instr_type_parallel_copy);
603
604 exec_list_make_empty(&instr->entries);
605
606 return instr;
607 }
608
609 nir_ssa_undef_instr *
610 nir_ssa_undef_instr_create(nir_shader *shader,
611 unsigned num_components,
612 unsigned bit_size)
613 {
614 nir_ssa_undef_instr *instr = ralloc(shader, nir_ssa_undef_instr);
615 instr_init(&instr->instr, nir_instr_type_ssa_undef);
616
617 nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size, NULL);
618
619 return instr;
620 }
621
622 static nir_const_value
623 const_value_float(double d, unsigned bit_size)
624 {
625 nir_const_value v;
626 switch (bit_size) {
627 case 16: v.u16[0] = _mesa_float_to_half(d); break;
628 case 32: v.f32[0] = d; break;
629 case 64: v.f64[0] = d; break;
630 default:
631 unreachable("Invalid bit size");
632 }
633 return v;
634 }
635
636 static nir_const_value
637 const_value_int(int64_t i, unsigned bit_size)
638 {
639 nir_const_value v;
640 switch (bit_size) {
641 case 8: v.i8[0] = i; break;
642 case 16: v.i16[0] = i; break;
643 case 32: v.i32[0] = i; break;
644 case 64: v.i64[0] = i; break;
645 default:
646 unreachable("Invalid bit size");
647 }
648 return v;
649 }
650
651 nir_const_value
652 nir_alu_binop_identity(nir_op binop, unsigned bit_size)
653 {
654 const int64_t max_int = (1ull << (bit_size - 1)) - 1;
655 const int64_t min_int = -max_int - 1;
656 switch (binop) {
657 case nir_op_iadd:
658 return const_value_int(0, bit_size);
659 case nir_op_fadd:
660 return const_value_float(0, bit_size);
661 case nir_op_imul:
662 return const_value_int(1, bit_size);
663 case nir_op_fmul:
664 return const_value_float(1, bit_size);
665 case nir_op_imin:
666 return const_value_int(max_int, bit_size);
667 case nir_op_umin:
668 return const_value_int(~0ull, bit_size);
669 case nir_op_fmin:
670 return const_value_float(INFINITY, bit_size);
671 case nir_op_imax:
672 return const_value_int(min_int, bit_size);
673 case nir_op_umax:
674 return const_value_int(0, bit_size);
675 case nir_op_fmax:
676 return const_value_float(-INFINITY, bit_size);
677 case nir_op_iand:
678 return const_value_int(~0ull, bit_size);
679 case nir_op_ior:
680 return const_value_int(0, bit_size);
681 case nir_op_ixor:
682 return const_value_int(0, bit_size);
683 default:
684 unreachable("Invalid reduction operation");
685 }
686 }
687
688 nir_function_impl *
689 nir_cf_node_get_function(nir_cf_node *node)
690 {
691 while (node->type != nir_cf_node_function) {
692 node = node->parent;
693 }
694
695 return nir_cf_node_as_function(node);
696 }
697
698 /* Reduces a cursor by trying to convert everything to after and trying to
699 * go up to block granularity when possible.
700 */
701 static nir_cursor
702 reduce_cursor(nir_cursor cursor)
703 {
704 switch (cursor.option) {
705 case nir_cursor_before_block:
706 assert(nir_cf_node_prev(&cursor.block->cf_node) == NULL ||
707 nir_cf_node_prev(&cursor.block->cf_node)->type != nir_cf_node_block);
708 if (exec_list_is_empty(&cursor.block->instr_list)) {
709 /* Empty block. After is as good as before. */
710 cursor.option = nir_cursor_after_block;
711 }
712 return cursor;
713
714 case nir_cursor_after_block:
715 return cursor;
716
717 case nir_cursor_before_instr: {
718 nir_instr *prev_instr = nir_instr_prev(cursor.instr);
719 if (prev_instr) {
720 /* Before this instruction is after the previous */
721 cursor.instr = prev_instr;
722 cursor.option = nir_cursor_after_instr;
723 } else {
724 /* No previous instruction. Switch to before block */
725 cursor.block = cursor.instr->block;
726 cursor.option = nir_cursor_before_block;
727 }
728 return reduce_cursor(cursor);
729 }
730
731 case nir_cursor_after_instr:
732 if (nir_instr_next(cursor.instr) == NULL) {
733 /* This is the last instruction, switch to after block */
734 cursor.option = nir_cursor_after_block;
735 cursor.block = cursor.instr->block;
736 }
737 return cursor;
738
739 default:
740 unreachable("Inavlid cursor option");
741 }
742 }
743
744 bool
745 nir_cursors_equal(nir_cursor a, nir_cursor b)
746 {
747 /* Reduced cursors should be unique */
748 a = reduce_cursor(a);
749 b = reduce_cursor(b);
750
751 return a.block == b.block && a.option == b.option;
752 }
753
754 static bool
755 add_use_cb(nir_src *src, void *state)
756 {
757 nir_instr *instr = state;
758
759 src->parent_instr = instr;
760 list_addtail(&src->use_link,
761 src->is_ssa ? &src->ssa->uses : &src->reg.reg->uses);
762
763 return true;
764 }
765
766 static bool
767 add_ssa_def_cb(nir_ssa_def *def, void *state)
768 {
769 nir_instr *instr = state;
770
771 if (instr->block && def->index == UINT_MAX) {
772 nir_function_impl *impl =
773 nir_cf_node_get_function(&instr->block->cf_node);
774
775 def->index = impl->ssa_alloc++;
776 }
777
778 return true;
779 }
780
781 static bool
782 add_reg_def_cb(nir_dest *dest, void *state)
783 {
784 nir_instr *instr = state;
785
786 if (!dest->is_ssa) {
787 dest->reg.parent_instr = instr;
788 list_addtail(&dest->reg.def_link, &dest->reg.reg->defs);
789 }
790
791 return true;
792 }
793
794 static void
795 add_defs_uses(nir_instr *instr)
796 {
797 nir_foreach_src(instr, add_use_cb, instr);
798 nir_foreach_dest(instr, add_reg_def_cb, instr);
799 nir_foreach_ssa_def(instr, add_ssa_def_cb, instr);
800 }
801
802 void
803 nir_instr_insert(nir_cursor cursor, nir_instr *instr)
804 {
805 switch (cursor.option) {
806 case nir_cursor_before_block:
807 /* Only allow inserting jumps into empty blocks. */
808 if (instr->type == nir_instr_type_jump)
809 assert(exec_list_is_empty(&cursor.block->instr_list));
810
811 instr->block = cursor.block;
812 add_defs_uses(instr);
813 exec_list_push_head(&cursor.block->instr_list, &instr->node);
814 break;
815 case nir_cursor_after_block: {
816 /* Inserting instructions after a jump is illegal. */
817 nir_instr *last = nir_block_last_instr(cursor.block);
818 assert(last == NULL || last->type != nir_instr_type_jump);
819 (void) last;
820
821 instr->block = cursor.block;
822 add_defs_uses(instr);
823 exec_list_push_tail(&cursor.block->instr_list, &instr->node);
824 break;
825 }
826 case nir_cursor_before_instr:
827 assert(instr->type != nir_instr_type_jump);
828 instr->block = cursor.instr->block;
829 add_defs_uses(instr);
830 exec_node_insert_node_before(&cursor.instr->node, &instr->node);
831 break;
832 case nir_cursor_after_instr:
833 /* Inserting instructions after a jump is illegal. */
834 assert(cursor.instr->type != nir_instr_type_jump);
835
836 /* Only allow inserting jumps at the end of the block. */
837 if (instr->type == nir_instr_type_jump)
838 assert(cursor.instr == nir_block_last_instr(cursor.instr->block));
839
840 instr->block = cursor.instr->block;
841 add_defs_uses(instr);
842 exec_node_insert_after(&cursor.instr->node, &instr->node);
843 break;
844 }
845
846 if (instr->type == nir_instr_type_jump)
847 nir_handle_add_jump(instr->block);
848 }
849
850 static bool
851 src_is_valid(const nir_src *src)
852 {
853 return src->is_ssa ? (src->ssa != NULL) : (src->reg.reg != NULL);
854 }
855
856 static bool
857 remove_use_cb(nir_src *src, void *state)
858 {
859 (void) state;
860
861 if (src_is_valid(src))
862 list_del(&src->use_link);
863
864 return true;
865 }
866
867 static bool
868 remove_def_cb(nir_dest *dest, void *state)
869 {
870 (void) state;
871
872 if (!dest->is_ssa)
873 list_del(&dest->reg.def_link);
874
875 return true;
876 }
877
878 static void
879 remove_defs_uses(nir_instr *instr)
880 {
881 nir_foreach_dest(instr, remove_def_cb, instr);
882 nir_foreach_src(instr, remove_use_cb, instr);
883 }
884
885 void nir_instr_remove_v(nir_instr *instr)
886 {
887 remove_defs_uses(instr);
888 exec_node_remove(&instr->node);
889
890 if (instr->type == nir_instr_type_jump) {
891 nir_jump_instr *jump_instr = nir_instr_as_jump(instr);
892 nir_handle_remove_jump(instr->block, jump_instr->type);
893 }
894 }
895
896 /*@}*/
897
898 void
899 nir_index_local_regs(nir_function_impl *impl)
900 {
901 unsigned index = 0;
902 foreach_list_typed(nir_register, reg, node, &impl->registers) {
903 reg->index = index++;
904 }
905 impl->reg_alloc = index;
906 }
907
908 void
909 nir_index_global_regs(nir_shader *shader)
910 {
911 unsigned index = 0;
912 foreach_list_typed(nir_register, reg, node, &shader->registers) {
913 reg->index = index++;
914 }
915 shader->reg_alloc = index;
916 }
917
918 static bool
919 visit_alu_dest(nir_alu_instr *instr, nir_foreach_dest_cb cb, void *state)
920 {
921 return cb(&instr->dest.dest, state);
922 }
923
924 static bool
925 visit_deref_dest(nir_deref_instr *instr, nir_foreach_dest_cb cb, void *state)
926 {
927 return cb(&instr->dest, state);
928 }
929
930 static bool
931 visit_intrinsic_dest(nir_intrinsic_instr *instr, nir_foreach_dest_cb cb,
932 void *state)
933 {
934 if (nir_intrinsic_infos[instr->intrinsic].has_dest)
935 return cb(&instr->dest, state);
936
937 return true;
938 }
939
940 static bool
941 visit_texture_dest(nir_tex_instr *instr, nir_foreach_dest_cb cb,
942 void *state)
943 {
944 return cb(&instr->dest, state);
945 }
946
947 static bool
948 visit_phi_dest(nir_phi_instr *instr, nir_foreach_dest_cb cb, void *state)
949 {
950 return cb(&instr->dest, state);
951 }
952
953 static bool
954 visit_parallel_copy_dest(nir_parallel_copy_instr *instr,
955 nir_foreach_dest_cb cb, void *state)
956 {
957 nir_foreach_parallel_copy_entry(entry, instr) {
958 if (!cb(&entry->dest, state))
959 return false;
960 }
961
962 return true;
963 }
964
965 bool
966 nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state)
967 {
968 switch (instr->type) {
969 case nir_instr_type_alu:
970 return visit_alu_dest(nir_instr_as_alu(instr), cb, state);
971 case nir_instr_type_deref:
972 return visit_deref_dest(nir_instr_as_deref(instr), cb, state);
973 case nir_instr_type_intrinsic:
974 return visit_intrinsic_dest(nir_instr_as_intrinsic(instr), cb, state);
975 case nir_instr_type_tex:
976 return visit_texture_dest(nir_instr_as_tex(instr), cb, state);
977 case nir_instr_type_phi:
978 return visit_phi_dest(nir_instr_as_phi(instr), cb, state);
979 case nir_instr_type_parallel_copy:
980 return visit_parallel_copy_dest(nir_instr_as_parallel_copy(instr),
981 cb, state);
982
983 case nir_instr_type_load_const:
984 case nir_instr_type_ssa_undef:
985 case nir_instr_type_call:
986 case nir_instr_type_jump:
987 break;
988
989 default:
990 unreachable("Invalid instruction type");
991 break;
992 }
993
994 return true;
995 }
996
997 struct foreach_ssa_def_state {
998 nir_foreach_ssa_def_cb cb;
999 void *client_state;
1000 };
1001
1002 static inline bool
1003 nir_ssa_def_visitor(nir_dest *dest, void *void_state)
1004 {
1005 struct foreach_ssa_def_state *state = void_state;
1006
1007 if (dest->is_ssa)
1008 return state->cb(&dest->ssa, state->client_state);
1009 else
1010 return true;
1011 }
1012
1013 bool
1014 nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state)
1015 {
1016 switch (instr->type) {
1017 case nir_instr_type_alu:
1018 case nir_instr_type_deref:
1019 case nir_instr_type_tex:
1020 case nir_instr_type_intrinsic:
1021 case nir_instr_type_phi:
1022 case nir_instr_type_parallel_copy: {
1023 struct foreach_ssa_def_state foreach_state = {cb, state};
1024 return nir_foreach_dest(instr, nir_ssa_def_visitor, &foreach_state);
1025 }
1026
1027 case nir_instr_type_load_const:
1028 return cb(&nir_instr_as_load_const(instr)->def, state);
1029 case nir_instr_type_ssa_undef:
1030 return cb(&nir_instr_as_ssa_undef(instr)->def, state);
1031 case nir_instr_type_call:
1032 case nir_instr_type_jump:
1033 return true;
1034 default:
1035 unreachable("Invalid instruction type");
1036 }
1037 }
1038
1039 static bool
1040 visit_src(nir_src *src, nir_foreach_src_cb cb, void *state)
1041 {
1042 if (!cb(src, state))
1043 return false;
1044 if (!src->is_ssa && src->reg.indirect)
1045 return cb(src->reg.indirect, state);
1046 return true;
1047 }
1048
1049 static bool
1050 visit_alu_src(nir_alu_instr *instr, nir_foreach_src_cb cb, void *state)
1051 {
1052 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
1053 if (!visit_src(&instr->src[i].src, cb, state))
1054 return false;
1055
1056 return true;
1057 }
1058
1059 static bool
1060 visit_deref_instr_src(nir_deref_instr *instr,
1061 nir_foreach_src_cb cb, void *state)
1062 {
1063 if (instr->deref_type != nir_deref_type_var) {
1064 if (!visit_src(&instr->parent, cb, state))
1065 return false;
1066 }
1067
1068 if (instr->deref_type == nir_deref_type_array) {
1069 if (!visit_src(&instr->arr.index, cb, state))
1070 return false;
1071 }
1072
1073 return true;
1074 }
1075
1076 static bool
1077 visit_tex_src(nir_tex_instr *instr, nir_foreach_src_cb cb, void *state)
1078 {
1079 for (unsigned i = 0; i < instr->num_srcs; i++) {
1080 if (!visit_src(&instr->src[i].src, cb, state))
1081 return false;
1082 }
1083
1084 return true;
1085 }
1086
1087 static bool
1088 visit_intrinsic_src(nir_intrinsic_instr *instr, nir_foreach_src_cb cb,
1089 void *state)
1090 {
1091 unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
1092 for (unsigned i = 0; i < num_srcs; i++) {
1093 if (!visit_src(&instr->src[i], cb, state))
1094 return false;
1095 }
1096
1097 return true;
1098 }
1099
1100 static bool
1101 visit_call_src(nir_call_instr *instr, nir_foreach_src_cb cb, void *state)
1102 {
1103 for (unsigned i = 0; i < instr->num_params; i++) {
1104 if (!visit_src(&instr->params[i], cb, state))
1105 return false;
1106 }
1107
1108 return true;
1109 }
1110
1111 static bool
1112 visit_phi_src(nir_phi_instr *instr, nir_foreach_src_cb cb, void *state)
1113 {
1114 nir_foreach_phi_src(src, instr) {
1115 if (!visit_src(&src->src, cb, state))
1116 return false;
1117 }
1118
1119 return true;
1120 }
1121
1122 static bool
1123 visit_parallel_copy_src(nir_parallel_copy_instr *instr,
1124 nir_foreach_src_cb cb, void *state)
1125 {
1126 nir_foreach_parallel_copy_entry(entry, instr) {
1127 if (!visit_src(&entry->src, cb, state))
1128 return false;
1129 }
1130
1131 return true;
1132 }
1133
1134 typedef struct {
1135 void *state;
1136 nir_foreach_src_cb cb;
1137 } visit_dest_indirect_state;
1138
1139 static bool
1140 visit_dest_indirect(nir_dest *dest, void *_state)
1141 {
1142 visit_dest_indirect_state *state = (visit_dest_indirect_state *) _state;
1143
1144 if (!dest->is_ssa && dest->reg.indirect)
1145 return state->cb(dest->reg.indirect, state->state);
1146
1147 return true;
1148 }
1149
1150 bool
1151 nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state)
1152 {
1153 switch (instr->type) {
1154 case nir_instr_type_alu:
1155 if (!visit_alu_src(nir_instr_as_alu(instr), cb, state))
1156 return false;
1157 break;
1158 case nir_instr_type_deref:
1159 if (!visit_deref_instr_src(nir_instr_as_deref(instr), cb, state))
1160 return false;
1161 break;
1162 case nir_instr_type_intrinsic:
1163 if (!visit_intrinsic_src(nir_instr_as_intrinsic(instr), cb, state))
1164 return false;
1165 break;
1166 case nir_instr_type_tex:
1167 if (!visit_tex_src(nir_instr_as_tex(instr), cb, state))
1168 return false;
1169 break;
1170 case nir_instr_type_call:
1171 if (!visit_call_src(nir_instr_as_call(instr), cb, state))
1172 return false;
1173 break;
1174 case nir_instr_type_load_const:
1175 /* Constant load instructions have no regular sources */
1176 break;
1177 case nir_instr_type_phi:
1178 if (!visit_phi_src(nir_instr_as_phi(instr), cb, state))
1179 return false;
1180 break;
1181 case nir_instr_type_parallel_copy:
1182 if (!visit_parallel_copy_src(nir_instr_as_parallel_copy(instr),
1183 cb, state))
1184 return false;
1185 break;
1186 case nir_instr_type_jump:
1187 case nir_instr_type_ssa_undef:
1188 return true;
1189
1190 default:
1191 unreachable("Invalid instruction type");
1192 break;
1193 }
1194
1195 visit_dest_indirect_state dest_state;
1196 dest_state.state = state;
1197 dest_state.cb = cb;
1198 return nir_foreach_dest(instr, visit_dest_indirect, &dest_state);
1199 }
1200
1201 int64_t
1202 nir_src_comp_as_int(nir_src src, unsigned comp)
1203 {
1204 assert(nir_src_is_const(src));
1205 nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1206
1207 assert(comp < load->def.num_components);
1208 switch (load->def.bit_size) {
1209 case 8: return load->value.i8[comp];
1210 case 16: return load->value.i16[comp];
1211 case 32: return load->value.i32[comp];
1212 case 64: return load->value.i64[comp];
1213 default:
1214 unreachable("Invalid bit size");
1215 }
1216 }
1217
1218 uint64_t
1219 nir_src_comp_as_uint(nir_src src, unsigned comp)
1220 {
1221 assert(nir_src_is_const(src));
1222 nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1223
1224 assert(comp < load->def.num_components);
1225 switch (load->def.bit_size) {
1226 case 8: return load->value.u8[comp];
1227 case 16: return load->value.u16[comp];
1228 case 32: return load->value.u32[comp];
1229 case 64: return load->value.u64[comp];
1230 default:
1231 unreachable("Invalid bit size");
1232 }
1233 }
1234
1235 bool
1236 nir_src_comp_as_bool(nir_src src, unsigned comp)
1237 {
1238 assert(nir_src_is_const(src));
1239 nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1240
1241 assert(comp < load->def.num_components);
1242 assert(load->def.bit_size == 32);
1243 assert(load->value.u32[comp] == NIR_TRUE ||
1244 load->value.u32[comp] == NIR_FALSE);
1245
1246 return load->value.u32[comp];
1247 }
1248
1249 double
1250 nir_src_comp_as_float(nir_src src, unsigned comp)
1251 {
1252 assert(nir_src_is_const(src));
1253 nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1254
1255 assert(comp < load->def.num_components);
1256 switch (load->def.bit_size) {
1257 case 16: return _mesa_half_to_float(load->value.u16[comp]);
1258 case 32: return load->value.f32[comp];
1259 case 64: return load->value.f64[comp];
1260 default:
1261 unreachable("Invalid bit size");
1262 }
1263 }
1264
1265 int64_t
1266 nir_src_as_int(nir_src src)
1267 {
1268 assert(nir_src_num_components(src) == 1);
1269 return nir_src_comp_as_int(src, 0);
1270 }
1271
1272 uint64_t
1273 nir_src_as_uint(nir_src src)
1274 {
1275 assert(nir_src_num_components(src) == 1);
1276 return nir_src_comp_as_uint(src, 0);
1277 }
1278
1279 bool
1280 nir_src_as_bool(nir_src src)
1281 {
1282 assert(nir_src_num_components(src) == 1);
1283 return nir_src_comp_as_bool(src, 0);
1284 }
1285
1286 double
1287 nir_src_as_float(nir_src src)
1288 {
1289 assert(nir_src_num_components(src) == 1);
1290 return nir_src_comp_as_float(src, 0);
1291 }
1292
1293 nir_const_value *
1294 nir_src_as_const_value(nir_src src)
1295 {
1296 if (!src.is_ssa)
1297 return NULL;
1298
1299 if (src.ssa->parent_instr->type != nir_instr_type_load_const)
1300 return NULL;
1301
1302 nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
1303
1304 return &load->value;
1305 }
1306
1307 /**
1308 * Returns true if the source is known to be dynamically uniform. Otherwise it
1309 * returns false which means it may or may not be dynamically uniform but it
1310 * can't be determined.
1311 */
1312 bool
1313 nir_src_is_dynamically_uniform(nir_src src)
1314 {
1315 if (!src.is_ssa)
1316 return false;
1317
1318 /* Constants are trivially dynamically uniform */
1319 if (src.ssa->parent_instr->type == nir_instr_type_load_const)
1320 return true;
1321
1322 /* As are uniform variables */
1323 if (src.ssa->parent_instr->type == nir_instr_type_intrinsic) {
1324 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(src.ssa->parent_instr);
1325
1326 if (intr->intrinsic == nir_intrinsic_load_uniform)
1327 return true;
1328 }
1329
1330 /* XXX: this could have many more tests, such as when a sampler function is
1331 * called with dynamically uniform arguments.
1332 */
1333 return false;
1334 }
1335
1336 static void
1337 src_remove_all_uses(nir_src *src)
1338 {
1339 for (; src; src = src->is_ssa ? NULL : src->reg.indirect) {
1340 if (!src_is_valid(src))
1341 continue;
1342
1343 list_del(&src->use_link);
1344 }
1345 }
1346
1347 static void
1348 src_add_all_uses(nir_src *src, nir_instr *parent_instr, nir_if *parent_if)
1349 {
1350 for (; src; src = src->is_ssa ? NULL : src->reg.indirect) {
1351 if (!src_is_valid(src))
1352 continue;
1353
1354 if (parent_instr) {
1355 src->parent_instr = parent_instr;
1356 if (src->is_ssa)
1357 list_addtail(&src->use_link, &src->ssa->uses);
1358 else
1359 list_addtail(&src->use_link, &src->reg.reg->uses);
1360 } else {
1361 assert(parent_if);
1362 src->parent_if = parent_if;
1363 if (src->is_ssa)
1364 list_addtail(&src->use_link, &src->ssa->if_uses);
1365 else
1366 list_addtail(&src->use_link, &src->reg.reg->if_uses);
1367 }
1368 }
1369 }
1370
1371 void
1372 nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src)
1373 {
1374 assert(!src_is_valid(src) || src->parent_instr == instr);
1375
1376 src_remove_all_uses(src);
1377 *src = new_src;
1378 src_add_all_uses(src, instr, NULL);
1379 }
1380
1381 void
1382 nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src)
1383 {
1384 assert(!src_is_valid(dest) || dest->parent_instr == dest_instr);
1385
1386 src_remove_all_uses(dest);
1387 src_remove_all_uses(src);
1388 *dest = *src;
1389 *src = NIR_SRC_INIT;
1390 src_add_all_uses(dest, dest_instr, NULL);
1391 }
1392
1393 void
1394 nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src)
1395 {
1396 nir_src *src = &if_stmt->condition;
1397 assert(!src_is_valid(src) || src->parent_if == if_stmt);
1398
1399 src_remove_all_uses(src);
1400 *src = new_src;
1401 src_add_all_uses(src, NULL, if_stmt);
1402 }
1403
1404 void
1405 nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, nir_dest new_dest)
1406 {
1407 if (dest->is_ssa) {
1408 /* We can only overwrite an SSA destination if it has no uses. */
1409 assert(list_empty(&dest->ssa.uses) && list_empty(&dest->ssa.if_uses));
1410 } else {
1411 list_del(&dest->reg.def_link);
1412 if (dest->reg.indirect)
1413 src_remove_all_uses(dest->reg.indirect);
1414 }
1415
1416 /* We can't re-write with an SSA def */
1417 assert(!new_dest.is_ssa);
1418
1419 nir_dest_copy(dest, &new_dest, instr);
1420
1421 dest->reg.parent_instr = instr;
1422 list_addtail(&dest->reg.def_link, &new_dest.reg.reg->defs);
1423
1424 if (dest->reg.indirect)
1425 src_add_all_uses(dest->reg.indirect, instr, NULL);
1426 }
1427
1428 /* note: does *not* take ownership of 'name' */
1429 void
1430 nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
1431 unsigned num_components,
1432 unsigned bit_size, const char *name)
1433 {
1434 def->name = ralloc_strdup(instr, name);
1435 def->parent_instr = instr;
1436 list_inithead(&def->uses);
1437 list_inithead(&def->if_uses);
1438 def->num_components = num_components;
1439 def->bit_size = bit_size;
1440
1441 if (instr->block) {
1442 nir_function_impl *impl =
1443 nir_cf_node_get_function(&instr->block->cf_node);
1444
1445 def->index = impl->ssa_alloc++;
1446 } else {
1447 def->index = UINT_MAX;
1448 }
1449 }
1450
1451 /* note: does *not* take ownership of 'name' */
1452 void
1453 nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
1454 unsigned num_components, unsigned bit_size,
1455 const char *name)
1456 {
1457 dest->is_ssa = true;
1458 nir_ssa_def_init(instr, &dest->ssa, num_components, bit_size, name);
1459 }
1460
1461 void
1462 nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src)
1463 {
1464 assert(!new_src.is_ssa || def != new_src.ssa);
1465
1466 nir_foreach_use_safe(use_src, def)
1467 nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
1468
1469 nir_foreach_if_use_safe(use_src, def)
1470 nir_if_rewrite_condition(use_src->parent_if, new_src);
1471 }
1472
1473 static bool
1474 is_instr_between(nir_instr *start, nir_instr *end, nir_instr *between)
1475 {
1476 assert(start->block == end->block);
1477
1478 if (between->block != start->block)
1479 return false;
1480
1481 /* Search backwards looking for "between" */
1482 while (start != end) {
1483 if (between == end)
1484 return true;
1485
1486 end = nir_instr_prev(end);
1487 assert(end);
1488 }
1489
1490 return false;
1491 }
1492
1493 /* Replaces all uses of the given SSA def with the given source but only if
1494 * the use comes after the after_me instruction. This can be useful if you
1495 * are emitting code to fix up the result of some instruction: you can freely
1496 * use the result in that code and then call rewrite_uses_after and pass the
1497 * last fixup instruction as after_me and it will replace all of the uses you
1498 * want without touching the fixup code.
1499 *
1500 * This function assumes that after_me is in the same block as
1501 * def->parent_instr and that after_me comes after def->parent_instr.
1502 */
1503 void
1504 nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src,
1505 nir_instr *after_me)
1506 {
1507 assert(!new_src.is_ssa || def != new_src.ssa);
1508
1509 nir_foreach_use_safe(use_src, def) {
1510 assert(use_src->parent_instr != def->parent_instr);
1511 /* Since def already dominates all of its uses, the only way a use can
1512 * not be dominated by after_me is if it is between def and after_me in
1513 * the instruction list.
1514 */
1515 if (!is_instr_between(def->parent_instr, after_me, use_src->parent_instr))
1516 nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src);
1517 }
1518
1519 nir_foreach_if_use_safe(use_src, def)
1520 nir_if_rewrite_condition(use_src->parent_if, new_src);
1521 }
1522
1523 nir_component_mask_t
1524 nir_ssa_def_components_read(const nir_ssa_def *def)
1525 {
1526 nir_component_mask_t read_mask = 0;
1527 nir_foreach_use(use, def) {
1528 if (use->parent_instr->type == nir_instr_type_alu) {
1529 nir_alu_instr *alu = nir_instr_as_alu(use->parent_instr);
1530 nir_alu_src *alu_src = exec_node_data(nir_alu_src, use, src);
1531 int src_idx = alu_src - &alu->src[0];
1532 assert(src_idx >= 0 && src_idx < nir_op_infos[alu->op].num_inputs);
1533 read_mask |= nir_alu_instr_src_read_mask(alu, src_idx);
1534 } else {
1535 return (1 << def->num_components) - 1;
1536 }
1537 }
1538
1539 if (!list_empty(&def->if_uses))
1540 read_mask |= 1;
1541
1542 return read_mask;
1543 }
1544
1545 nir_block *
1546 nir_block_cf_tree_next(nir_block *block)
1547 {
1548 if (block == NULL) {
1549 /* nir_foreach_block_safe() will call this function on a NULL block
1550 * after the last iteration, but it won't use the result so just return
1551 * NULL here.
1552 */
1553 return NULL;
1554 }
1555
1556 nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node);
1557 if (cf_next)
1558 return nir_cf_node_cf_tree_first(cf_next);
1559
1560 nir_cf_node *parent = block->cf_node.parent;
1561
1562 switch (parent->type) {
1563 case nir_cf_node_if: {
1564 /* Are we at the end of the if? Go to the beginning of the else */
1565 nir_if *if_stmt = nir_cf_node_as_if(parent);
1566 if (block == nir_if_last_then_block(if_stmt))
1567 return nir_if_first_else_block(if_stmt);
1568
1569 assert(block == nir_if_last_else_block(if_stmt));
1570 /* fall through */
1571 }
1572
1573 case nir_cf_node_loop:
1574 return nir_cf_node_as_block(nir_cf_node_next(parent));
1575
1576 case nir_cf_node_function:
1577 return NULL;
1578
1579 default:
1580 unreachable("unknown cf node type");
1581 }
1582 }
1583
1584 nir_block *
1585 nir_block_cf_tree_prev(nir_block *block)
1586 {
1587 if (block == NULL) {
1588 /* do this for consistency with nir_block_cf_tree_next() */
1589 return NULL;
1590 }
1591
1592 nir_cf_node *cf_prev = nir_cf_node_prev(&block->cf_node);
1593 if (cf_prev)
1594 return nir_cf_node_cf_tree_last(cf_prev);
1595
1596 nir_cf_node *parent = block->cf_node.parent;
1597
1598 switch (parent->type) {
1599 case nir_cf_node_if: {
1600 /* Are we at the beginning of the else? Go to the end of the if */
1601 nir_if *if_stmt = nir_cf_node_as_if(parent);
1602 if (block == nir_if_first_else_block(if_stmt))
1603 return nir_if_last_then_block(if_stmt);
1604
1605 assert(block == nir_if_first_then_block(if_stmt));
1606 /* fall through */
1607 }
1608
1609 case nir_cf_node_loop:
1610 return nir_cf_node_as_block(nir_cf_node_prev(parent));
1611
1612 case nir_cf_node_function:
1613 return NULL;
1614
1615 default:
1616 unreachable("unknown cf node type");
1617 }
1618 }
1619
1620 nir_block *nir_cf_node_cf_tree_first(nir_cf_node *node)
1621 {
1622 switch (node->type) {
1623 case nir_cf_node_function: {
1624 nir_function_impl *impl = nir_cf_node_as_function(node);
1625 return nir_start_block(impl);
1626 }
1627
1628 case nir_cf_node_if: {
1629 nir_if *if_stmt = nir_cf_node_as_if(node);
1630 return nir_if_first_then_block(if_stmt);
1631 }
1632
1633 case nir_cf_node_loop: {
1634 nir_loop *loop = nir_cf_node_as_loop(node);
1635 return nir_loop_first_block(loop);
1636 }
1637
1638 case nir_cf_node_block: {
1639 return nir_cf_node_as_block(node);
1640 }
1641
1642 default:
1643 unreachable("unknown node type");
1644 }
1645 }
1646
1647 nir_block *nir_cf_node_cf_tree_last(nir_cf_node *node)
1648 {
1649 switch (node->type) {
1650 case nir_cf_node_function: {
1651 nir_function_impl *impl = nir_cf_node_as_function(node);
1652 return nir_impl_last_block(impl);
1653 }
1654
1655 case nir_cf_node_if: {
1656 nir_if *if_stmt = nir_cf_node_as_if(node);
1657 return nir_if_last_else_block(if_stmt);
1658 }
1659
1660 case nir_cf_node_loop: {
1661 nir_loop *loop = nir_cf_node_as_loop(node);
1662 return nir_loop_last_block(loop);
1663 }
1664
1665 case nir_cf_node_block: {
1666 return nir_cf_node_as_block(node);
1667 }
1668
1669 default:
1670 unreachable("unknown node type");
1671 }
1672 }
1673
1674 nir_block *nir_cf_node_cf_tree_next(nir_cf_node *node)
1675 {
1676 if (node->type == nir_cf_node_block)
1677 return nir_block_cf_tree_next(nir_cf_node_as_block(node));
1678 else if (node->type == nir_cf_node_function)
1679 return NULL;
1680 else
1681 return nir_cf_node_as_block(nir_cf_node_next(node));
1682 }
1683
1684 nir_if *
1685 nir_block_get_following_if(nir_block *block)
1686 {
1687 if (exec_node_is_tail_sentinel(&block->cf_node.node))
1688 return NULL;
1689
1690 if (nir_cf_node_is_last(&block->cf_node))
1691 return NULL;
1692
1693 nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
1694
1695 if (next_node->type != nir_cf_node_if)
1696 return NULL;
1697
1698 return nir_cf_node_as_if(next_node);
1699 }
1700
1701 nir_loop *
1702 nir_block_get_following_loop(nir_block *block)
1703 {
1704 if (exec_node_is_tail_sentinel(&block->cf_node.node))
1705 return NULL;
1706
1707 if (nir_cf_node_is_last(&block->cf_node))
1708 return NULL;
1709
1710 nir_cf_node *next_node = nir_cf_node_next(&block->cf_node);
1711
1712 if (next_node->type != nir_cf_node_loop)
1713 return NULL;
1714
1715 return nir_cf_node_as_loop(next_node);
1716 }
1717
1718 void
1719 nir_index_blocks(nir_function_impl *impl)
1720 {
1721 unsigned index = 0;
1722
1723 if (impl->valid_metadata & nir_metadata_block_index)
1724 return;
1725
1726 nir_foreach_block(block, impl) {
1727 block->index = index++;
1728 }
1729
1730 /* The end_block isn't really part of the program, which is why its index
1731 * is >= num_blocks.
1732 */
1733 impl->num_blocks = impl->end_block->index = index;
1734 }
1735
1736 static bool
1737 index_ssa_def_cb(nir_ssa_def *def, void *state)
1738 {
1739 unsigned *index = (unsigned *) state;
1740 def->index = (*index)++;
1741
1742 return true;
1743 }
1744
1745 /**
1746 * The indices are applied top-to-bottom which has the very nice property
1747 * that, if A dominates B, then A->index <= B->index.
1748 */
1749 void
1750 nir_index_ssa_defs(nir_function_impl *impl)
1751 {
1752 unsigned index = 0;
1753
1754 nir_foreach_block(block, impl) {
1755 nir_foreach_instr(instr, block)
1756 nir_foreach_ssa_def(instr, index_ssa_def_cb, &index);
1757 }
1758
1759 impl->ssa_alloc = index;
1760 }
1761
1762 /**
1763 * The indices are applied top-to-bottom which has the very nice property
1764 * that, if A dominates B, then A->index <= B->index.
1765 */
1766 unsigned
1767 nir_index_instrs(nir_function_impl *impl)
1768 {
1769 unsigned index = 0;
1770
1771 nir_foreach_block(block, impl) {
1772 nir_foreach_instr(instr, block)
1773 instr->index = index++;
1774 }
1775
1776 return index;
1777 }
1778
1779 nir_intrinsic_op
1780 nir_intrinsic_from_system_value(gl_system_value val)
1781 {
1782 switch (val) {
1783 case SYSTEM_VALUE_VERTEX_ID:
1784 return nir_intrinsic_load_vertex_id;
1785 case SYSTEM_VALUE_INSTANCE_ID:
1786 return nir_intrinsic_load_instance_id;
1787 case SYSTEM_VALUE_DRAW_ID:
1788 return nir_intrinsic_load_draw_id;
1789 case SYSTEM_VALUE_BASE_INSTANCE:
1790 return nir_intrinsic_load_base_instance;
1791 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
1792 return nir_intrinsic_load_vertex_id_zero_base;
1793 case SYSTEM_VALUE_IS_INDEXED_DRAW:
1794 return nir_intrinsic_load_is_indexed_draw;
1795 case SYSTEM_VALUE_FIRST_VERTEX:
1796 return nir_intrinsic_load_first_vertex;
1797 case SYSTEM_VALUE_BASE_VERTEX:
1798 return nir_intrinsic_load_base_vertex;
1799 case SYSTEM_VALUE_INVOCATION_ID:
1800 return nir_intrinsic_load_invocation_id;
1801 case SYSTEM_VALUE_FRAG_COORD:
1802 return nir_intrinsic_load_frag_coord;
1803 case SYSTEM_VALUE_FRONT_FACE:
1804 return nir_intrinsic_load_front_face;
1805 case SYSTEM_VALUE_SAMPLE_ID:
1806 return nir_intrinsic_load_sample_id;
1807 case SYSTEM_VALUE_SAMPLE_POS:
1808 return nir_intrinsic_load_sample_pos;
1809 case SYSTEM_VALUE_SAMPLE_MASK_IN:
1810 return nir_intrinsic_load_sample_mask_in;
1811 case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
1812 return nir_intrinsic_load_local_invocation_id;
1813 case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX:
1814 return nir_intrinsic_load_local_invocation_index;
1815 case SYSTEM_VALUE_WORK_GROUP_ID:
1816 return nir_intrinsic_load_work_group_id;
1817 case SYSTEM_VALUE_NUM_WORK_GROUPS:
1818 return nir_intrinsic_load_num_work_groups;
1819 case SYSTEM_VALUE_PRIMITIVE_ID:
1820 return nir_intrinsic_load_primitive_id;
1821 case SYSTEM_VALUE_TESS_COORD:
1822 return nir_intrinsic_load_tess_coord;
1823 case SYSTEM_VALUE_TESS_LEVEL_OUTER:
1824 return nir_intrinsic_load_tess_level_outer;
1825 case SYSTEM_VALUE_TESS_LEVEL_INNER:
1826 return nir_intrinsic_load_tess_level_inner;
1827 case SYSTEM_VALUE_VERTICES_IN:
1828 return nir_intrinsic_load_patch_vertices_in;
1829 case SYSTEM_VALUE_HELPER_INVOCATION:
1830 return nir_intrinsic_load_helper_invocation;
1831 case SYSTEM_VALUE_VIEW_INDEX:
1832 return nir_intrinsic_load_view_index;
1833 case SYSTEM_VALUE_SUBGROUP_SIZE:
1834 return nir_intrinsic_load_subgroup_size;
1835 case SYSTEM_VALUE_SUBGROUP_INVOCATION:
1836 return nir_intrinsic_load_subgroup_invocation;
1837 case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
1838 return nir_intrinsic_load_subgroup_eq_mask;
1839 case SYSTEM_VALUE_SUBGROUP_GE_MASK:
1840 return nir_intrinsic_load_subgroup_ge_mask;
1841 case SYSTEM_VALUE_SUBGROUP_GT_MASK:
1842 return nir_intrinsic_load_subgroup_gt_mask;
1843 case SYSTEM_VALUE_SUBGROUP_LE_MASK:
1844 return nir_intrinsic_load_subgroup_le_mask;
1845 case SYSTEM_VALUE_SUBGROUP_LT_MASK:
1846 return nir_intrinsic_load_subgroup_lt_mask;
1847 case SYSTEM_VALUE_NUM_SUBGROUPS:
1848 return nir_intrinsic_load_num_subgroups;
1849 case SYSTEM_VALUE_SUBGROUP_ID:
1850 return nir_intrinsic_load_subgroup_id;
1851 case SYSTEM_VALUE_LOCAL_GROUP_SIZE:
1852 return nir_intrinsic_load_local_group_size;
1853 case SYSTEM_VALUE_GLOBAL_INVOCATION_ID:
1854 return nir_intrinsic_load_global_invocation_id;
1855 case SYSTEM_VALUE_WORK_DIM:
1856 return nir_intrinsic_load_work_dim;
1857 default:
1858 unreachable("system value does not directly correspond to intrinsic");
1859 }
1860 }
1861
1862 gl_system_value
1863 nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
1864 {
1865 switch (intrin) {
1866 case nir_intrinsic_load_vertex_id:
1867 return SYSTEM_VALUE_VERTEX_ID;
1868 case nir_intrinsic_load_instance_id:
1869 return SYSTEM_VALUE_INSTANCE_ID;
1870 case nir_intrinsic_load_draw_id:
1871 return SYSTEM_VALUE_DRAW_ID;
1872 case nir_intrinsic_load_base_instance:
1873 return SYSTEM_VALUE_BASE_INSTANCE;
1874 case nir_intrinsic_load_vertex_id_zero_base:
1875 return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE;
1876 case nir_intrinsic_load_first_vertex:
1877 return SYSTEM_VALUE_FIRST_VERTEX;
1878 case nir_intrinsic_load_is_indexed_draw:
1879 return SYSTEM_VALUE_IS_INDEXED_DRAW;
1880 case nir_intrinsic_load_base_vertex:
1881 return SYSTEM_VALUE_BASE_VERTEX;
1882 case nir_intrinsic_load_invocation_id:
1883 return SYSTEM_VALUE_INVOCATION_ID;
1884 case nir_intrinsic_load_frag_coord:
1885 return SYSTEM_VALUE_FRAG_COORD;
1886 case nir_intrinsic_load_front_face:
1887 return SYSTEM_VALUE_FRONT_FACE;
1888 case nir_intrinsic_load_sample_id:
1889 return SYSTEM_VALUE_SAMPLE_ID;
1890 case nir_intrinsic_load_sample_pos:
1891 return SYSTEM_VALUE_SAMPLE_POS;
1892 case nir_intrinsic_load_sample_mask_in:
1893 return SYSTEM_VALUE_SAMPLE_MASK_IN;
1894 case nir_intrinsic_load_local_invocation_id:
1895 return SYSTEM_VALUE_LOCAL_INVOCATION_ID;
1896 case nir_intrinsic_load_local_invocation_index:
1897 return SYSTEM_VALUE_LOCAL_INVOCATION_INDEX;
1898 case nir_intrinsic_load_num_work_groups:
1899 return SYSTEM_VALUE_NUM_WORK_GROUPS;
1900 case nir_intrinsic_load_work_group_id:
1901 return SYSTEM_VALUE_WORK_GROUP_ID;
1902 case nir_intrinsic_load_primitive_id:
1903 return SYSTEM_VALUE_PRIMITIVE_ID;
1904 case nir_intrinsic_load_tess_coord:
1905 return SYSTEM_VALUE_TESS_COORD;
1906 case nir_intrinsic_load_tess_level_outer:
1907 return SYSTEM_VALUE_TESS_LEVEL_OUTER;
1908 case nir_intrinsic_load_tess_level_inner:
1909 return SYSTEM_VALUE_TESS_LEVEL_INNER;
1910 case nir_intrinsic_load_patch_vertices_in:
1911 return SYSTEM_VALUE_VERTICES_IN;
1912 case nir_intrinsic_load_helper_invocation:
1913 return SYSTEM_VALUE_HELPER_INVOCATION;
1914 case nir_intrinsic_load_view_index:
1915 return SYSTEM_VALUE_VIEW_INDEX;
1916 case nir_intrinsic_load_subgroup_size:
1917 return SYSTEM_VALUE_SUBGROUP_SIZE;
1918 case nir_intrinsic_load_subgroup_invocation:
1919 return SYSTEM_VALUE_SUBGROUP_INVOCATION;
1920 case nir_intrinsic_load_subgroup_eq_mask:
1921 return SYSTEM_VALUE_SUBGROUP_EQ_MASK;
1922 case nir_intrinsic_load_subgroup_ge_mask:
1923 return SYSTEM_VALUE_SUBGROUP_GE_MASK;
1924 case nir_intrinsic_load_subgroup_gt_mask:
1925 return SYSTEM_VALUE_SUBGROUP_GT_MASK;
1926 case nir_intrinsic_load_subgroup_le_mask:
1927 return SYSTEM_VALUE_SUBGROUP_LE_MASK;
1928 case nir_intrinsic_load_subgroup_lt_mask:
1929 return SYSTEM_VALUE_SUBGROUP_LT_MASK;
1930 case nir_intrinsic_load_num_subgroups:
1931 return SYSTEM_VALUE_NUM_SUBGROUPS;
1932 case nir_intrinsic_load_subgroup_id:
1933 return SYSTEM_VALUE_SUBGROUP_ID;
1934 case nir_intrinsic_load_local_group_size:
1935 return SYSTEM_VALUE_LOCAL_GROUP_SIZE;
1936 case nir_intrinsic_load_global_invocation_id:
1937 return SYSTEM_VALUE_GLOBAL_INVOCATION_ID;
1938 default:
1939 unreachable("intrinsic doesn't produce a system value");
1940 }
1941 }
1942
1943 /* OpenGL utility method that remaps the location attributes if they are
1944 * doubles. Not needed for vulkan due the differences on the input location
1945 * count for doubles on vulkan vs OpenGL
1946 *
1947 * The bitfield returned in dual_slot is one bit for each double input slot in
1948 * the original OpenGL single-slot input numbering. The mapping from old
1949 * locations to new locations is as follows:
1950 *
1951 * new_loc = loc + util_bitcount(dual_slot & BITFIELD64_MASK(loc))
1952 */
1953 void
1954 nir_remap_dual_slot_attributes(nir_shader *shader, uint64_t *dual_slot)
1955 {
1956 assert(shader->info.stage == MESA_SHADER_VERTEX);
1957
1958 *dual_slot = 0;
1959 nir_foreach_variable(var, &shader->inputs) {
1960 if (glsl_type_is_dual_slot(glsl_without_array(var->type))) {
1961 unsigned slots = glsl_count_attribute_slots(var->type, true);
1962 *dual_slot |= BITFIELD64_MASK(slots) << var->data.location;
1963 }
1964 }
1965
1966 nir_foreach_variable(var, &shader->inputs) {
1967 var->data.location +=
1968 util_bitcount64(*dual_slot & BITFIELD64_MASK(var->data.location));
1969 }
1970 }
1971
1972 /* Returns an attribute mask that has been re-compacted using the given
1973 * dual_slot mask.
1974 */
1975 uint64_t
1976 nir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot)
1977 {
1978 while (dual_slot) {
1979 unsigned loc = u_bit_scan64(&dual_slot);
1980 /* mask of all bits up to and including loc */
1981 uint64_t mask = BITFIELD64_MASK(loc + 1);
1982 attribs = (attribs & mask) | ((attribs & ~mask) >> 1);
1983 }
1984 return attribs;
1985 }