2 * Copyright © 2015 Thomas Helland
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "nir_constant_expressions.h"
26 #include "nir_loop_analyze.h"
33 } nir_loop_variable_type
;
35 typedef struct nir_basic_induction_var
{
36 nir_alu_instr
*alu
; /* The def of the alu-operation */
37 nir_ssa_def
*def_outside_loop
; /* The phi-src outside the loop */
38 } nir_basic_induction_var
;
41 /* A link for the work list */
42 struct list_head process_link
;
46 /* The ssa_def associated with this info */
49 /* The type of this ssa_def */
50 nir_loop_variable_type type
;
52 /* If this is of type basic_induction */
53 struct nir_basic_induction_var
*ind
;
55 /* True if variable is in an if branch */
58 /* True if variable is in a nested loop */
64 /* The loop we store information for */
67 /* Loop_variable for all ssa_defs in function */
68 nir_loop_variable
*loop_vars
;
70 /* A list of the loop_vars to analyze */
71 struct list_head process_list
;
73 nir_variable_mode indirect_mask
;
77 static nir_loop_variable
*
78 get_loop_var(nir_ssa_def
*value
, loop_info_state
*state
)
80 return &(state
->loop_vars
[value
->index
]);
84 loop_info_state
*state
;
90 init_loop_def(nir_ssa_def
*def
, void *void_init_loop_state
)
92 init_loop_state
*loop_init_state
= void_init_loop_state
;
93 nir_loop_variable
*var
= get_loop_var(def
, loop_init_state
->state
);
95 if (loop_init_state
->in_nested_loop
) {
96 var
->in_nested_loop
= true;
97 } else if (loop_init_state
->in_if_branch
) {
98 var
->in_if_branch
= true;
100 /* Add to the tail of the list. That way we start at the beginning of
101 * the defs in the loop instead of the end when walking the list. This
102 * means less recursive calls. Only add defs that are not in nested
103 * loops or conditional blocks.
105 list_addtail(&var
->process_link
, &loop_init_state
->state
->process_list
);
113 /** Calculate an estimated cost in number of instructions
115 * We do this so that we don't unroll loops which will later get massively
116 * inflated due to int64 or fp64 lowering. The estimates provided here don't
117 * have to be massively accurate; they just have to be good enough that loop
118 * unrolling doesn't cause things to blow up too much.
121 instr_cost(nir_instr
*instr
, const nir_shader_compiler_options
*options
)
123 if (instr
->type
== nir_instr_type_intrinsic
||
124 instr
->type
== nir_instr_type_tex
)
127 if (instr
->type
!= nir_instr_type_alu
)
130 nir_alu_instr
*alu
= nir_instr_as_alu(instr
);
131 const nir_op_info
*info
= &nir_op_infos
[alu
->op
];
133 /* Assume everything 16 or 32-bit is cheap.
135 * There are no 64-bit ops that don't have a 64-bit thing as their
136 * destination or first source.
138 if (nir_dest_bit_size(alu
->dest
.dest
) < 64 &&
139 nir_src_bit_size(alu
->src
[0].src
) < 64)
142 bool is_fp64
= nir_dest_bit_size(alu
->dest
.dest
) == 64 &&
143 nir_alu_type_get_base_type(info
->output_type
) == nir_type_float
;
144 for (unsigned i
= 0; i
< info
->num_inputs
; i
++) {
145 if (nir_src_bit_size(alu
->src
[i
].src
) == 64 &&
146 nir_alu_type_get_base_type(info
->input_types
[i
]) == nir_type_float
)
151 /* If it's something lowered normally, it's expensive. */
153 if (options
->lower_doubles_options
&
154 nir_lower_doubles_op_to_options_mask(alu
->op
))
157 /* If it's full software, it's even more expensive */
158 if (options
->lower_doubles_options
& nir_lower_fp64_full_software
)
163 if (options
->lower_int64_options
&
164 nir_lower_int64_op_to_options_mask(alu
->op
)) {
165 /* These require a doing the division algorithm. */
166 if (alu
->op
== nir_op_idiv
|| alu
->op
== nir_op_udiv
||
167 alu
->op
== nir_op_imod
|| alu
->op
== nir_op_umod
||
168 alu
->op
== nir_op_irem
)
171 /* Other int64 lowering isn't usually all that expensive */
180 init_loop_block(nir_block
*block
, loop_info_state
*state
,
181 bool in_if_branch
, bool in_nested_loop
,
182 const nir_shader_compiler_options
*options
)
184 init_loop_state init_state
= {.in_if_branch
= in_if_branch
,
185 .in_nested_loop
= in_nested_loop
,
188 nir_foreach_instr(instr
, block
) {
189 state
->loop
->info
->instr_cost
+= instr_cost(instr
, options
);
190 nir_foreach_ssa_def(instr
, init_loop_def
, &init_state
);
197 is_var_alu(nir_loop_variable
*var
)
199 return var
->def
->parent_instr
->type
== nir_instr_type_alu
;
203 is_var_constant(nir_loop_variable
*var
)
205 return var
->def
->parent_instr
->type
== nir_instr_type_load_const
;
209 is_var_phi(nir_loop_variable
*var
)
211 return var
->def
->parent_instr
->type
== nir_instr_type_phi
;
215 mark_invariant(nir_ssa_def
*def
, loop_info_state
*state
)
217 nir_loop_variable
*var
= get_loop_var(def
, state
);
219 if (var
->type
== invariant
)
223 var
->type
= invariant
;
227 if (var
->type
== not_invariant
)
230 if (is_var_alu(var
)) {
231 nir_alu_instr
*alu
= nir_instr_as_alu(def
->parent_instr
);
233 for (unsigned i
= 0; i
< nir_op_infos
[alu
->op
].num_inputs
; i
++) {
234 if (!mark_invariant(alu
->src
[i
].src
.ssa
, state
)) {
235 var
->type
= not_invariant
;
239 var
->type
= invariant
;
243 /* Phis shouldn't be invariant except if one operand is invariant, and the
244 * other is the phi itself. These should be removed by opt_remove_phis.
245 * load_consts are already set to invariant and constant during init,
246 * and so should return earlier. Remaining op_codes are set undefined.
248 var
->type
= not_invariant
;
253 compute_invariance_information(loop_info_state
*state
)
255 /* An expression is invariant in a loop L if:
258 * – it’s a variable use, all of whose single defs are outside of L
260 * – it’s a pure computation all of whose args are loop invariant
261 * – it’s a variable use whose single reaching def, and the
262 * rhs of that def is loop-invariant
264 list_for_each_entry_safe(nir_loop_variable
, var
, &state
->process_list
,
266 assert(!var
->in_if_branch
&& !var
->in_nested_loop
);
268 if (mark_invariant(var
->def
, state
))
269 list_del(&var
->process_link
);
273 /* If all of the instruction sources point to identical ALU instructions (as
274 * per nir_instrs_equal), return one of the ALU instructions. Otherwise,
277 static nir_alu_instr
*
278 phi_instr_as_alu(nir_phi_instr
*phi
)
280 nir_alu_instr
*first
= NULL
;
281 nir_foreach_phi_src(src
, phi
) {
282 assert(src
->src
.is_ssa
);
283 if (src
->src
.ssa
->parent_instr
->type
!= nir_instr_type_alu
)
286 nir_alu_instr
*alu
= nir_instr_as_alu(src
->src
.ssa
->parent_instr
);
290 if (!nir_instrs_equal(&first
->instr
, &alu
->instr
))
299 alu_src_has_identity_swizzle(nir_alu_instr
*alu
, unsigned src_idx
)
301 assert(nir_op_infos
[alu
->op
].input_sizes
[src_idx
] == 0);
302 assert(alu
->dest
.dest
.is_ssa
);
303 for (unsigned i
= 0; i
< alu
->dest
.dest
.ssa
.num_components
; i
++) {
304 if (alu
->src
[src_idx
].swizzle
[i
] != i
)
312 compute_induction_information(loop_info_state
*state
)
314 bool found_induction_var
= false;
315 list_for_each_entry_safe(nir_loop_variable
, var
, &state
->process_list
,
318 /* It can't be an induction variable if it is invariant. Invariants and
319 * things in nested loops or conditionals should have been removed from
320 * the list by compute_invariance_information().
322 assert(!var
->in_if_branch
&& !var
->in_nested_loop
&&
323 var
->type
!= invariant
);
325 /* We are only interested in checking phis for the basic induction
326 * variable case as its simple to detect. All basic induction variables
329 if (!is_var_phi(var
))
332 nir_phi_instr
*phi
= nir_instr_as_phi(var
->def
->parent_instr
);
333 nir_basic_induction_var
*biv
= rzalloc(state
, nir_basic_induction_var
);
335 nir_loop_variable
*alu_src_var
= NULL
;
336 nir_foreach_phi_src(src
, phi
) {
337 nir_loop_variable
*src_var
= get_loop_var(src
->src
.ssa
, state
);
339 /* If one of the sources is in an if branch or nested loop then don't
340 * attempt to go any further.
342 if (src_var
->in_if_branch
|| src_var
->in_nested_loop
)
345 /* Detect inductions variables that are incremented in both branches
346 * of an unnested if rather than in a loop block.
348 if (is_var_phi(src_var
)) {
349 nir_phi_instr
*src_phi
=
350 nir_instr_as_phi(src_var
->def
->parent_instr
);
351 nir_alu_instr
*src_phi_alu
= phi_instr_as_alu(src_phi
);
353 src_var
= get_loop_var(&src_phi_alu
->dest
.dest
.ssa
, state
);
354 if (!src_var
->in_if_branch
)
359 if (!src_var
->in_loop
&& !biv
->def_outside_loop
) {
360 biv
->def_outside_loop
= src_var
->def
;
361 } else if (is_var_alu(src_var
) && !biv
->alu
) {
362 alu_src_var
= src_var
;
363 nir_alu_instr
*alu
= nir_instr_as_alu(src_var
->def
->parent_instr
);
365 if (nir_op_infos
[alu
->op
].num_inputs
== 2) {
366 for (unsigned i
= 0; i
< 2; i
++) {
367 /* Is one of the operands const, and the other the phi. The
368 * phi source can't be swizzled in any way.
370 if (nir_src_is_const(alu
->src
[i
].src
) &&
371 alu
->src
[1-i
].src
.ssa
== &phi
->dest
.ssa
&&
372 alu_src_has_identity_swizzle(alu
, 1 - i
))
385 if (biv
->alu
&& biv
->def_outside_loop
&&
386 biv
->def_outside_loop
->parent_instr
->type
== nir_instr_type_load_const
) {
387 alu_src_var
->type
= basic_induction
;
388 alu_src_var
->ind
= biv
;
389 var
->type
= basic_induction
;
392 found_induction_var
= true;
397 return found_induction_var
;
401 initialize_ssa_def(nir_ssa_def
*def
, void *void_state
)
403 loop_info_state
*state
= void_state
;
404 nir_loop_variable
*var
= get_loop_var(def
, state
);
406 var
->in_loop
= false;
409 if (def
->parent_instr
->type
== nir_instr_type_load_const
) {
410 var
->type
= invariant
;
412 var
->type
= undefined
;
419 find_loop_terminators(loop_info_state
*state
)
421 bool success
= false;
422 foreach_list_typed_safe(nir_cf_node
, node
, node
, &state
->loop
->body
) {
423 if (node
->type
== nir_cf_node_if
) {
424 nir_if
*nif
= nir_cf_node_as_if(node
);
426 nir_block
*break_blk
= NULL
;
427 nir_block
*continue_from_blk
= NULL
;
428 bool continue_from_then
= true;
430 nir_block
*last_then
= nir_if_last_then_block(nif
);
431 nir_block
*last_else
= nir_if_last_else_block(nif
);
432 if (nir_block_ends_in_break(last_then
)) {
433 break_blk
= last_then
;
434 continue_from_blk
= last_else
;
435 continue_from_then
= false;
436 } else if (nir_block_ends_in_break(last_else
)) {
437 break_blk
= last_else
;
438 continue_from_blk
= last_then
;
441 /* If there is a break then we should find a terminator. If we can
442 * not find a loop terminator, but there is a break-statement then
443 * we should return false so that we do not try to find trip-count
445 if (!nir_is_trivial_loop_if(nif
, break_blk
)) {
446 state
->loop
->info
->complex_loop
= true;
450 /* Continue if the if contained no jumps at all */
454 if (nif
->condition
.ssa
->parent_instr
->type
== nir_instr_type_phi
) {
455 state
->loop
->info
->complex_loop
= true;
459 nir_loop_terminator
*terminator
=
460 rzalloc(state
->loop
->info
, nir_loop_terminator
);
462 list_addtail(&terminator
->loop_terminator_link
,
463 &state
->loop
->info
->loop_terminator_list
);
465 terminator
->nif
= nif
;
466 terminator
->break_block
= break_blk
;
467 terminator
->continue_from_block
= continue_from_blk
;
468 terminator
->continue_from_then
= continue_from_then
;
469 terminator
->conditional_instr
= nif
->condition
.ssa
->parent_instr
;
478 /* This function looks for an array access within a loop that uses an
479 * induction variable for the array index. If found it returns the size of the
480 * array, otherwise 0 is returned. If we find an induction var we pass it back
481 * to the caller via array_index_out.
484 find_array_access_via_induction(loop_info_state
*state
,
485 nir_deref_instr
*deref
,
486 nir_loop_variable
**array_index_out
)
488 for (nir_deref_instr
*d
= deref
; d
; d
= nir_deref_instr_parent(d
)) {
489 if (d
->deref_type
!= nir_deref_type_array
)
492 assert(d
->arr
.index
.is_ssa
);
493 nir_loop_variable
*array_index
= get_loop_var(d
->arr
.index
.ssa
, state
);
495 if (array_index
->type
!= basic_induction
)
499 *array_index_out
= array_index
;
501 nir_deref_instr
*parent
= nir_deref_instr_parent(d
);
502 if (glsl_type_is_array_or_matrix(parent
->type
)) {
503 return glsl_get_length(parent
->type
);
505 assert(glsl_type_is_vector(parent
->type
));
506 return glsl_get_vector_elements(parent
->type
);
514 guess_loop_limit(loop_info_state
*state
, nir_const_value
*limit_val
,
515 nir_ssa_scalar basic_ind
)
517 unsigned min_array_size
= 0;
519 nir_foreach_block_in_cf_node(block
, &state
->loop
->cf_node
) {
520 nir_foreach_instr(instr
, block
) {
521 if (instr
->type
!= nir_instr_type_intrinsic
)
524 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
526 /* Check for arrays variably-indexed by a loop induction variable. */
527 if (intrin
->intrinsic
== nir_intrinsic_load_deref
||
528 intrin
->intrinsic
== nir_intrinsic_store_deref
||
529 intrin
->intrinsic
== nir_intrinsic_copy_deref
) {
531 nir_loop_variable
*array_idx
= NULL
;
532 unsigned array_size
=
533 find_array_access_via_induction(state
,
534 nir_src_as_deref(intrin
->src
[0]),
536 if (array_idx
&& basic_ind
.def
== array_idx
->def
&&
537 (min_array_size
== 0 || min_array_size
> array_size
)) {
538 /* Array indices are scalars */
539 assert(basic_ind
.def
->num_components
== 1);
540 min_array_size
= array_size
;
543 if (intrin
->intrinsic
!= nir_intrinsic_copy_deref
)
547 find_array_access_via_induction(state
,
548 nir_src_as_deref(intrin
->src
[1]),
550 if (array_idx
&& basic_ind
.def
== array_idx
->def
&&
551 (min_array_size
== 0 || min_array_size
> array_size
)) {
552 /* Array indices are scalars */
553 assert(basic_ind
.def
->num_components
== 1);
554 min_array_size
= array_size
;
560 if (min_array_size
) {
561 *limit_val
= nir_const_value_for_uint(min_array_size
,
562 basic_ind
.def
->bit_size
);
570 try_find_limit_of_alu(nir_ssa_scalar limit
, nir_const_value
*limit_val
,
571 nir_loop_terminator
*terminator
, loop_info_state
*state
)
573 if (!nir_ssa_scalar_is_alu(limit
))
576 nir_op limit_op
= nir_ssa_scalar_alu_op(limit
);
577 if (limit_op
== nir_op_imin
|| limit_op
== nir_op_fmin
) {
578 for (unsigned i
= 0; i
< 2; i
++) {
579 nir_ssa_scalar src
= nir_ssa_scalar_chase_alu_src(limit
, i
);
580 if (nir_ssa_scalar_is_const(src
)) {
581 *limit_val
= nir_ssa_scalar_as_const_value(src
);
582 terminator
->exact_trip_count_unknown
= true;
591 static nir_const_value
592 eval_const_unop(nir_op op
, unsigned bit_size
, nir_const_value src0
,
593 unsigned execution_mode
)
595 assert(nir_op_infos
[op
].num_inputs
== 1);
596 nir_const_value dest
;
597 nir_const_value
*src
[1] = { &src0
};
598 nir_eval_const_opcode(op
, &dest
, 1, bit_size
, src
, execution_mode
);
602 static nir_const_value
603 eval_const_binop(nir_op op
, unsigned bit_size
,
604 nir_const_value src0
, nir_const_value src1
,
605 unsigned execution_mode
)
607 assert(nir_op_infos
[op
].num_inputs
== 2);
608 nir_const_value dest
;
609 nir_const_value
*src
[2] = { &src0
, &src1
};
610 nir_eval_const_opcode(op
, &dest
, 1, bit_size
, src
, execution_mode
);
615 get_iteration(nir_op cond_op
, nir_const_value initial
, nir_const_value step
,
616 nir_const_value limit
, unsigned bit_size
,
617 unsigned execution_mode
)
619 nir_const_value span
, iter
;
626 span
= eval_const_binop(nir_op_isub
, bit_size
, limit
, initial
,
628 iter
= eval_const_binop(nir_op_idiv
, bit_size
, span
, step
,
634 span
= eval_const_binop(nir_op_isub
, bit_size
, limit
, initial
,
636 iter
= eval_const_binop(nir_op_udiv
, bit_size
, span
, step
,
644 span
= eval_const_binop(nir_op_fsub
, bit_size
, limit
, initial
,
646 iter
= eval_const_binop(nir_op_fdiv
, bit_size
, span
,
647 step
, execution_mode
);
648 iter
= eval_const_unop(nir_op_f2i64
, bit_size
, iter
, execution_mode
);
655 uint64_t iter_u64
= nir_const_value_as_uint(iter
, bit_size
);
656 return iter_u64
> INT_MAX
? -1 : (int)iter_u64
;
660 will_break_on_first_iteration(nir_const_value step
,
661 nir_alu_type induction_base_type
,
662 unsigned trip_offset
,
663 nir_op cond_op
, unsigned bit_size
,
664 nir_const_value initial
,
665 nir_const_value limit
,
666 bool limit_rhs
, bool invert_cond
,
667 unsigned execution_mode
)
669 if (trip_offset
== 1) {
671 switch (induction_base_type
) {
673 add_op
= nir_op_fadd
;
677 add_op
= nir_op_iadd
;
680 unreachable("Unhandled induction variable base type!");
683 initial
= eval_const_binop(add_op
, bit_size
, initial
, step
,
687 nir_const_value
*src
[2];
688 src
[limit_rhs
? 0 : 1] = &initial
;
689 src
[limit_rhs
? 1 : 0] = &limit
;
691 /* Evaluate the loop exit condition */
692 nir_const_value result
;
693 nir_eval_const_opcode(cond_op
, &result
, 1, bit_size
, src
, execution_mode
);
695 return invert_cond
? !result
.b
: result
.b
;
699 test_iterations(int32_t iter_int
, nir_const_value step
,
700 nir_const_value limit
, nir_op cond_op
, unsigned bit_size
,
701 nir_alu_type induction_base_type
,
702 nir_const_value initial
, bool limit_rhs
, bool invert_cond
,
703 unsigned execution_mode
)
705 assert(nir_op_infos
[cond_op
].num_inputs
== 2);
707 nir_const_value iter_src
;
710 switch (induction_base_type
) {
712 iter_src
= nir_const_value_for_float(iter_int
, bit_size
);
713 mul_op
= nir_op_fmul
;
714 add_op
= nir_op_fadd
;
718 iter_src
= nir_const_value_for_int(iter_int
, bit_size
);
719 mul_op
= nir_op_imul
;
720 add_op
= nir_op_iadd
;
723 unreachable("Unhandled induction variable base type!");
726 /* Multiple the iteration count we are testing by the number of times we
727 * step the induction variable each iteration.
729 nir_const_value mul_result
=
730 eval_const_binop(mul_op
, bit_size
, iter_src
, step
, execution_mode
);
732 /* Add the initial value to the accumulated induction variable total */
733 nir_const_value add_result
=
734 eval_const_binop(add_op
, bit_size
, mul_result
, initial
, execution_mode
);
736 nir_const_value
*src
[2];
737 src
[limit_rhs
? 0 : 1] = &add_result
;
738 src
[limit_rhs
? 1 : 0] = &limit
;
740 /* Evaluate the loop exit condition */
741 nir_const_value result
;
742 nir_eval_const_opcode(cond_op
, &result
, 1, bit_size
, src
, execution_mode
);
744 return invert_cond
? !result
.b
: result
.b
;
748 calculate_iterations(nir_const_value initial
, nir_const_value step
,
749 nir_const_value limit
, nir_alu_instr
*alu
,
750 nir_ssa_scalar cond
, nir_op alu_op
, bool limit_rhs
,
751 bool invert_cond
, unsigned execution_mode
)
753 /* nir_op_isub should have been lowered away by this point */
754 assert(alu
->op
!= nir_op_isub
);
756 /* Make sure the alu type for our induction variable is compatible with the
757 * conditional alus input type. If its not something has gone really wrong.
759 nir_alu_type induction_base_type
=
760 nir_alu_type_get_base_type(nir_op_infos
[alu
->op
].output_type
);
761 if (induction_base_type
== nir_type_int
|| induction_base_type
== nir_type_uint
) {
762 assert(nir_alu_type_get_base_type(nir_op_infos
[alu_op
].input_types
[1]) == nir_type_int
||
763 nir_alu_type_get_base_type(nir_op_infos
[alu_op
].input_types
[1]) == nir_type_uint
);
765 assert(nir_alu_type_get_base_type(nir_op_infos
[alu_op
].input_types
[0]) ==
766 induction_base_type
);
769 /* Check for nsupported alu operations */
770 if (alu
->op
!= nir_op_iadd
&& alu
->op
!= nir_op_fadd
)
773 /* do-while loops can increment the starting value before the condition is
780 * Here we check if the induction variable is used directly by the loop
781 * condition and if so we assume we need to step the initial value.
783 unsigned trip_offset
= 0;
784 nir_alu_instr
*cond_alu
= nir_instr_as_alu(cond
.def
->parent_instr
);
785 if (cond_alu
->src
[0].src
.ssa
== &alu
->dest
.dest
.ssa
||
786 cond_alu
->src
[1].src
.ssa
== &alu
->dest
.dest
.ssa
) {
790 assert(nir_src_bit_size(alu
->src
[0].src
) ==
791 nir_src_bit_size(alu
->src
[1].src
));
792 unsigned bit_size
= nir_src_bit_size(alu
->src
[0].src
);
794 /* get_iteration works under assumption that iterator will be
795 * incremented or decremented until it hits the limit,
796 * however if the loop condition is false on the first iteration
797 * get_iteration's assumption is broken. Handle such loops first.
799 if (will_break_on_first_iteration(step
, induction_base_type
, trip_offset
,
800 alu_op
, bit_size
, initial
,
801 limit
, limit_rhs
, invert_cond
,
806 int iter_int
= get_iteration(alu_op
, initial
, step
, limit
, bit_size
,
809 /* If iter_int is negative the loop is ill-formed or is the conditional is
810 * unsigned with a huge iteration count so don't bother going any further.
815 /* An explanation from the GLSL unrolling pass:
817 * Make sure that the calculated number of iterations satisfies the exit
818 * condition. This is needed to catch off-by-one errors and some types of
819 * ill-formed loops. For example, we need to detect that the following
820 * loop does not have a maximum iteration count.
822 * for (float x = 0.0; x != 0.9; x += 0.2);
824 for (int bias
= -1; bias
<= 1; bias
++) {
825 const int iter_bias
= iter_int
+ bias
;
827 if (test_iterations(iter_bias
, step
, limit
, alu_op
, bit_size
,
828 induction_base_type
, initial
,
829 limit_rhs
, invert_cond
, execution_mode
)) {
830 return iter_bias
> 0 ? iter_bias
- trip_offset
: iter_bias
;
838 inverse_comparison(nir_op alu_op
)
862 unreachable("Unsuported comparison!");
867 is_supported_terminator_condition(nir_ssa_scalar cond
)
869 if (!nir_ssa_scalar_is_alu(cond
))
872 nir_alu_instr
*alu
= nir_instr_as_alu(cond
.def
->parent_instr
);
873 return nir_alu_instr_is_comparison(alu
) &&
874 nir_op_infos
[alu
->op
].num_inputs
== 2;
878 get_induction_and_limit_vars(nir_ssa_scalar cond
,
880 nir_ssa_scalar
*limit
,
882 loop_info_state
*state
)
884 nir_ssa_scalar rhs
, lhs
;
885 lhs
= nir_ssa_scalar_chase_alu_src(cond
, 0);
886 rhs
= nir_ssa_scalar_chase_alu_src(cond
, 1);
888 if (get_loop_var(lhs
.def
, state
)->type
== basic_induction
) {
893 } else if (get_loop_var(rhs
.def
, state
)->type
== basic_induction
) {
904 try_find_trip_count_vars_in_iand(nir_ssa_scalar
*cond
,
906 nir_ssa_scalar
*limit
,
908 loop_info_state
*state
)
910 const nir_op alu_op
= nir_ssa_scalar_alu_op(*cond
);
911 assert(alu_op
== nir_op_ieq
|| alu_op
== nir_op_inot
);
913 nir_ssa_scalar iand
= nir_ssa_scalar_chase_alu_src(*cond
, 0);
915 if (alu_op
== nir_op_ieq
) {
916 nir_ssa_scalar zero
= nir_ssa_scalar_chase_alu_src(*cond
, 1);
918 if (!nir_ssa_scalar_is_alu(iand
) || !nir_ssa_scalar_is_const(zero
)) {
919 /* Maybe we had it the wrong way, flip things around */
920 nir_ssa_scalar tmp
= zero
;
924 /* If we still didn't find what we need then return */
925 if (!nir_ssa_scalar_is_const(zero
))
929 /* If the loop is not breaking on (x && y) == 0 then return */
930 if (nir_ssa_scalar_as_uint(zero
) != 0)
934 if (!nir_ssa_scalar_is_alu(iand
))
937 if (nir_ssa_scalar_alu_op(iand
) != nir_op_iand
)
940 /* Check if iand src is a terminator condition and try get induction var
941 * and trip limit var.
943 bool found_induction_var
= false;
944 for (unsigned i
= 0; i
< 2; i
++) {
945 nir_ssa_scalar src
= nir_ssa_scalar_chase_alu_src(iand
, i
);
946 if (is_supported_terminator_condition(src
) &&
947 get_induction_and_limit_vars(src
, ind
, limit
, limit_rhs
, state
)) {
949 found_induction_var
= true;
951 /* If we've found one with a constant limit, stop. */
952 if (nir_ssa_scalar_is_const(*limit
))
957 return found_induction_var
;
960 /* Run through each of the terminators of the loop and try to infer a possible
961 * trip-count. We need to check them all, and set the lowest trip-count as the
962 * trip-count of our loop. If one of the terminators has an undecidable
963 * trip-count we can not safely assume anything about the duration of the
967 find_trip_count(loop_info_state
*state
, unsigned execution_mode
)
969 bool trip_count_known
= true;
970 bool guessed_trip_count
= false;
971 nir_loop_terminator
*limiting_terminator
= NULL
;
972 int max_trip_count
= -1;
974 list_for_each_entry(nir_loop_terminator
, terminator
,
975 &state
->loop
->info
->loop_terminator_list
,
976 loop_terminator_link
) {
977 assert(terminator
->nif
->condition
.is_ssa
);
978 nir_ssa_scalar cond
= { terminator
->nif
->condition
.ssa
, 0 };
980 if (!nir_ssa_scalar_is_alu(cond
)) {
981 /* If we get here the loop is dead and will get cleaned up by the
982 * nir_opt_dead_cf pass.
984 trip_count_known
= false;
988 nir_op alu_op
= nir_ssa_scalar_alu_op(cond
);
991 nir_ssa_scalar basic_ind
= { NULL
, 0 };
992 nir_ssa_scalar limit
;
993 if ((alu_op
== nir_op_inot
|| alu_op
== nir_op_ieq
) &&
994 try_find_trip_count_vars_in_iand(&cond
, &basic_ind
, &limit
,
995 &limit_rhs
, state
)) {
997 /* The loop is exiting on (x && y) == 0 so we need to get the
998 * inverse of x or y (i.e. which ever contained the induction var) in
999 * order to compute the trip count.
1001 alu_op
= inverse_comparison(nir_ssa_scalar_alu_op(cond
));
1002 trip_count_known
= false;
1003 terminator
->exact_trip_count_unknown
= true;
1006 if (!basic_ind
.def
) {
1007 if (is_supported_terminator_condition(cond
)) {
1008 get_induction_and_limit_vars(cond
, &basic_ind
,
1009 &limit
, &limit_rhs
, state
);
1013 /* The comparison has to have a basic induction variable for us to be
1014 * able to find trip counts.
1016 if (!basic_ind
.def
) {
1017 trip_count_known
= false;
1021 terminator
->induction_rhs
= !limit_rhs
;
1023 /* Attempt to find a constant limit for the loop */
1024 nir_const_value limit_val
;
1025 if (nir_ssa_scalar_is_const(limit
)) {
1026 limit_val
= nir_ssa_scalar_as_const_value(limit
);
1028 trip_count_known
= false;
1030 if (!try_find_limit_of_alu(limit
, &limit_val
, terminator
, state
)) {
1031 /* Guess loop limit based on array access */
1032 if (!guess_loop_limit(state
, &limit_val
, basic_ind
)) {
1036 guessed_trip_count
= true;
1040 /* We have determined that we have the following constants:
1041 * (With the typical int i = 0; i < x; i++; as an example)
1044 * - Step / iteration size
1045 * Thats all thats needed to calculate the trip-count
1048 nir_basic_induction_var
*ind_var
=
1049 get_loop_var(basic_ind
.def
, state
)->ind
;
1051 /* The basic induction var might be a vector but, because we guarantee
1052 * earlier that the phi source has a scalar swizzle, we can take the
1053 * component from basic_ind.
1055 nir_ssa_scalar initial_s
= { ind_var
->def_outside_loop
, basic_ind
.comp
};
1056 nir_ssa_scalar alu_s
= { &ind_var
->alu
->dest
.dest
.ssa
, basic_ind
.comp
};
1058 nir_const_value initial_val
= nir_ssa_scalar_as_const_value(initial_s
);
1060 /* We are guaranteed by earlier code that at least one of these sources
1061 * is a constant but we don't know which.
1063 nir_const_value step_val
;
1064 memset(&step_val
, 0, sizeof(step_val
));
1065 UNUSED
bool found_step_value
= false;
1066 assert(nir_op_infos
[ind_var
->alu
->op
].num_inputs
== 2);
1067 for (unsigned i
= 0; i
< 2; i
++) {
1068 nir_ssa_scalar alu_src
= nir_ssa_scalar_chase_alu_src(alu_s
, i
);
1069 if (nir_ssa_scalar_is_const(alu_src
)) {
1070 found_step_value
= true;
1071 step_val
= nir_ssa_scalar_as_const_value(alu_src
);
1075 assert(found_step_value
);
1077 int iterations
= calculate_iterations(initial_val
, step_val
, limit_val
,
1080 terminator
->continue_from_then
,
1083 /* Where we not able to calculate the iteration count */
1084 if (iterations
== -1) {
1085 trip_count_known
= false;
1086 guessed_trip_count
= false;
1090 if (guessed_trip_count
) {
1091 guessed_trip_count
= false;
1092 if (state
->loop
->info
->guessed_trip_count
== 0 ||
1093 state
->loop
->info
->guessed_trip_count
> iterations
)
1094 state
->loop
->info
->guessed_trip_count
= iterations
;
1099 /* If this is the first run or we have found a smaller amount of
1100 * iterations than previously (we have identified a more limiting
1101 * terminator) set the trip count and limiting terminator.
1103 if (max_trip_count
== -1 || iterations
< max_trip_count
) {
1104 max_trip_count
= iterations
;
1105 limiting_terminator
= terminator
;
1109 state
->loop
->info
->exact_trip_count_known
= trip_count_known
;
1110 if (max_trip_count
> -1)
1111 state
->loop
->info
->max_trip_count
= max_trip_count
;
1112 state
->loop
->info
->limiting_terminator
= limiting_terminator
;
1116 force_unroll_array_access(loop_info_state
*state
, nir_deref_instr
*deref
)
1118 unsigned array_size
= find_array_access_via_induction(state
, deref
, NULL
);
1120 if (array_size
== state
->loop
->info
->max_trip_count
)
1123 if (deref
->mode
& state
->indirect_mask
)
1131 force_unroll_heuristics(loop_info_state
*state
, nir_block
*block
)
1133 nir_foreach_instr(instr
, block
) {
1134 if (instr
->type
!= nir_instr_type_intrinsic
)
1137 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
1139 /* Check for arrays variably-indexed by a loop induction variable.
1140 * Unrolling the loop may convert that access into constant-indexing.
1142 if (intrin
->intrinsic
== nir_intrinsic_load_deref
||
1143 intrin
->intrinsic
== nir_intrinsic_store_deref
||
1144 intrin
->intrinsic
== nir_intrinsic_copy_deref
) {
1145 if (force_unroll_array_access(state
,
1146 nir_src_as_deref(intrin
->src
[0])))
1149 if (intrin
->intrinsic
== nir_intrinsic_copy_deref
&&
1150 force_unroll_array_access(state
,
1151 nir_src_as_deref(intrin
->src
[1])))
1160 get_loop_info(loop_info_state
*state
, nir_function_impl
*impl
)
1162 nir_shader
*shader
= impl
->function
->shader
;
1163 const nir_shader_compiler_options
*options
= shader
->options
;
1165 /* Initialize all variables to "outside_loop". This also marks defs
1166 * invariant and constant if they are nir_instr_type_load_consts
1168 nir_foreach_block(block
, impl
) {
1169 nir_foreach_instr(instr
, block
)
1170 nir_foreach_ssa_def(instr
, initialize_ssa_def
, state
);
1173 /* Add all entries in the outermost part of the loop to the processing list
1174 * Mark the entries in conditionals or in nested loops accordingly
1176 foreach_list_typed_safe(nir_cf_node
, node
, node
, &state
->loop
->body
) {
1177 switch (node
->type
) {
1179 case nir_cf_node_block
:
1180 init_loop_block(nir_cf_node_as_block(node
), state
,
1181 false, false, options
);
1184 case nir_cf_node_if
:
1185 nir_foreach_block_in_cf_node(block
, node
)
1186 init_loop_block(block
, state
, true, false, options
);
1189 case nir_cf_node_loop
:
1190 nir_foreach_block_in_cf_node(block
, node
) {
1191 init_loop_block(block
, state
, false, true, options
);
1195 case nir_cf_node_function
:
1200 /* Try to find all simple terminators of the loop. If we can't find any,
1201 * or we find possible terminators that have side effects then bail.
1203 if (!find_loop_terminators(state
)) {
1204 list_for_each_entry_safe(nir_loop_terminator
, terminator
,
1205 &state
->loop
->info
->loop_terminator_list
,
1206 loop_terminator_link
) {
1207 list_del(&terminator
->loop_terminator_link
);
1208 ralloc_free(terminator
);
1213 /* Induction analysis needs invariance information so get that first */
1214 compute_invariance_information(state
);
1216 /* We have invariance information so try to find induction variables */
1217 if (!compute_induction_information(state
))
1220 /* Run through each of the terminators and try to compute a trip-count */
1221 find_trip_count(state
, impl
->function
->shader
->info
.float_controls_execution_mode
);
1223 nir_foreach_block_in_cf_node(block
, &state
->loop
->cf_node
) {
1224 if (force_unroll_heuristics(state
, block
)) {
1225 state
->loop
->info
->force_unroll
= true;
1231 static loop_info_state
*
1232 initialize_loop_info_state(nir_loop
*loop
, void *mem_ctx
,
1233 nir_function_impl
*impl
)
1235 loop_info_state
*state
= rzalloc(mem_ctx
, loop_info_state
);
1236 state
->loop_vars
= rzalloc_array(mem_ctx
, nir_loop_variable
,
1240 list_inithead(&state
->process_list
);
1243 ralloc_free(loop
->info
);
1245 loop
->info
= rzalloc(loop
, nir_loop_info
);
1247 list_inithead(&loop
->info
->loop_terminator_list
);
1253 process_loops(nir_cf_node
*cf_node
, nir_variable_mode indirect_mask
)
1255 switch (cf_node
->type
) {
1256 case nir_cf_node_block
:
1258 case nir_cf_node_if
: {
1259 nir_if
*if_stmt
= nir_cf_node_as_if(cf_node
);
1260 foreach_list_typed(nir_cf_node
, nested_node
, node
, &if_stmt
->then_list
)
1261 process_loops(nested_node
, indirect_mask
);
1262 foreach_list_typed(nir_cf_node
, nested_node
, node
, &if_stmt
->else_list
)
1263 process_loops(nested_node
, indirect_mask
);
1266 case nir_cf_node_loop
: {
1267 nir_loop
*loop
= nir_cf_node_as_loop(cf_node
);
1268 foreach_list_typed(nir_cf_node
, nested_node
, node
, &loop
->body
)
1269 process_loops(nested_node
, indirect_mask
);
1273 unreachable("unknown cf node type");
1276 nir_loop
*loop
= nir_cf_node_as_loop(cf_node
);
1277 nir_function_impl
*impl
= nir_cf_node_get_function(cf_node
);
1278 void *mem_ctx
= ralloc_context(NULL
);
1280 loop_info_state
*state
= initialize_loop_info_state(loop
, mem_ctx
, impl
);
1281 state
->indirect_mask
= indirect_mask
;
1283 get_loop_info(state
, impl
);
1285 ralloc_free(mem_ctx
);
1289 nir_loop_analyze_impl(nir_function_impl
*impl
,
1290 nir_variable_mode indirect_mask
)
1292 nir_index_ssa_defs(impl
);
1293 foreach_list_typed(nir_cf_node
, node
, node
, &impl
->body
)
1294 process_loops(node
, indirect_mask
);