2 * Copyright © 2015 Thomas Helland
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "nir_constant_expressions.h"
26 #include "nir_loop_analyze.h"
33 } nir_loop_variable_type
;
35 struct nir_basic_induction_var
;
38 /* A link for the work list */
39 struct list_head process_link
;
43 /* The ssa_def associated with this info */
46 /* The type of this ssa_def */
47 nir_loop_variable_type type
;
49 /* If this is of type basic_induction */
50 struct nir_basic_induction_var
*ind
;
52 /* True if variable is in an if branch */
55 /* True if variable is in a nested loop */
60 typedef struct nir_basic_induction_var
{
61 nir_op alu_op
; /* The type of alu-operation */
62 nir_loop_variable
*alu_def
; /* The def of the alu-operation */
63 nir_loop_variable
*invariant
; /* The invariant alu-operand */
64 nir_loop_variable
*def_outside_loop
; /* The phi-src outside the loop */
65 } nir_basic_induction_var
;
68 /* The loop we store information for */
71 /* Loop_variable for all ssa_defs in function */
72 nir_loop_variable
*loop_vars
;
74 /* A list of the loop_vars to analyze */
75 struct list_head process_list
;
77 nir_variable_mode indirect_mask
;
81 static nir_loop_variable
*
82 get_loop_var(nir_ssa_def
*value
, loop_info_state
*state
)
84 return &(state
->loop_vars
[value
->index
]);
88 loop_info_state
*state
;
94 init_loop_def(nir_ssa_def
*def
, void *void_init_loop_state
)
96 init_loop_state
*loop_init_state
= void_init_loop_state
;
97 nir_loop_variable
*var
= get_loop_var(def
, loop_init_state
->state
);
99 if (loop_init_state
->in_nested_loop
) {
100 var
->in_nested_loop
= true;
101 } else if (loop_init_state
->in_if_branch
) {
102 var
->in_if_branch
= true;
104 /* Add to the tail of the list. That way we start at the beginning of
105 * the defs in the loop instead of the end when walking the list. This
106 * means less recursive calls. Only add defs that are not in nested
107 * loops or conditional blocks.
109 list_addtail(&var
->process_link
, &loop_init_state
->state
->process_list
);
117 /** Calculate an estimated cost in number of instructions
119 * We do this so that we don't unroll loops which will later get massively
120 * inflated due to int64 or fp64 lowering. The estimates provided here don't
121 * have to be massively accurate; they just have to be good enough that loop
122 * unrolling doesn't cause things to blow up too much.
125 instr_cost(nir_instr
*instr
, const nir_shader_compiler_options
*options
)
127 if (instr
->type
== nir_instr_type_intrinsic
||
128 instr
->type
== nir_instr_type_tex
)
131 if (instr
->type
!= nir_instr_type_alu
)
134 nir_alu_instr
*alu
= nir_instr_as_alu(instr
);
135 const nir_op_info
*info
= &nir_op_infos
[alu
->op
];
137 /* Assume everything 16 or 32-bit is cheap.
139 * There are no 64-bit ops that don't have a 64-bit thing as their
140 * destination or first source.
142 if (nir_dest_bit_size(alu
->dest
.dest
) < 64 &&
143 nir_src_bit_size(alu
->src
[0].src
) < 64)
146 bool is_fp64
= nir_dest_bit_size(alu
->dest
.dest
) == 64 &&
147 nir_alu_type_get_base_type(info
->output_type
) == nir_type_float
;
148 for (unsigned i
= 0; i
< info
->num_inputs
; i
++) {
149 if (nir_src_bit_size(alu
->src
[i
].src
) == 64 &&
150 nir_alu_type_get_base_type(info
->input_types
[i
]) == nir_type_float
)
155 /* If it's something lowered normally, it's expensive. */
157 if (options
->lower_doubles_options
&
158 nir_lower_doubles_op_to_options_mask(alu
->op
))
161 /* If it's full software, it's even more expensive */
162 if (options
->lower_doubles_options
& nir_lower_fp64_full_software
)
167 if (options
->lower_int64_options
&
168 nir_lower_int64_op_to_options_mask(alu
->op
)) {
169 /* These require a doing the division algorithm. */
170 if (alu
->op
== nir_op_idiv
|| alu
->op
== nir_op_udiv
||
171 alu
->op
== nir_op_imod
|| alu
->op
== nir_op_umod
||
172 alu
->op
== nir_op_irem
)
175 /* Other int64 lowering isn't usually all that expensive */
184 init_loop_block(nir_block
*block
, loop_info_state
*state
,
185 bool in_if_branch
, bool in_nested_loop
,
186 const nir_shader_compiler_options
*options
)
188 init_loop_state init_state
= {.in_if_branch
= in_if_branch
,
189 .in_nested_loop
= in_nested_loop
,
192 nir_foreach_instr(instr
, block
) {
193 state
->loop
->info
->instr_cost
+= instr_cost(instr
, options
);
194 nir_foreach_ssa_def(instr
, init_loop_def
, &init_state
);
201 is_var_alu(nir_loop_variable
*var
)
203 return var
->def
->parent_instr
->type
== nir_instr_type_alu
;
207 is_var_constant(nir_loop_variable
*var
)
209 return var
->def
->parent_instr
->type
== nir_instr_type_load_const
;
213 is_var_phi(nir_loop_variable
*var
)
215 return var
->def
->parent_instr
->type
== nir_instr_type_phi
;
219 mark_invariant(nir_ssa_def
*def
, loop_info_state
*state
)
221 nir_loop_variable
*var
= get_loop_var(def
, state
);
223 if (var
->type
== invariant
)
227 var
->type
= invariant
;
231 if (var
->type
== not_invariant
)
234 if (is_var_alu(var
)) {
235 nir_alu_instr
*alu
= nir_instr_as_alu(def
->parent_instr
);
237 for (unsigned i
= 0; i
< nir_op_infos
[alu
->op
].num_inputs
; i
++) {
238 if (!mark_invariant(alu
->src
[i
].src
.ssa
, state
)) {
239 var
->type
= not_invariant
;
243 var
->type
= invariant
;
247 /* Phis shouldn't be invariant except if one operand is invariant, and the
248 * other is the phi itself. These should be removed by opt_remove_phis.
249 * load_consts are already set to invariant and constant during init,
250 * and so should return earlier. Remaining op_codes are set undefined.
252 var
->type
= not_invariant
;
257 compute_invariance_information(loop_info_state
*state
)
259 /* An expression is invariant in a loop L if:
262 * – it’s a variable use, all of whose single defs are outside of L
264 * – it’s a pure computation all of whose args are loop invariant
265 * – it’s a variable use whose single reaching def, and the
266 * rhs of that def is loop-invariant
268 list_for_each_entry_safe(nir_loop_variable
, var
, &state
->process_list
,
270 assert(!var
->in_if_branch
&& !var
->in_nested_loop
);
272 if (mark_invariant(var
->def
, state
))
273 list_del(&var
->process_link
);
277 /* If all of the instruction sources point to identical ALU instructions (as
278 * per nir_instrs_equal), return one of the ALU instructions. Otherwise,
281 static nir_alu_instr
*
282 phi_instr_as_alu(nir_phi_instr
*phi
)
284 nir_alu_instr
*first
= NULL
;
285 nir_foreach_phi_src(src
, phi
) {
286 assert(src
->src
.is_ssa
);
287 if (src
->src
.ssa
->parent_instr
->type
!= nir_instr_type_alu
)
290 nir_alu_instr
*alu
= nir_instr_as_alu(src
->src
.ssa
->parent_instr
);
294 if (!nir_instrs_equal(&first
->instr
, &alu
->instr
))
303 compute_induction_information(loop_info_state
*state
)
305 bool found_induction_var
= false;
306 list_for_each_entry_safe(nir_loop_variable
, var
, &state
->process_list
,
309 /* It can't be an induction variable if it is invariant. Invariants and
310 * things in nested loops or conditionals should have been removed from
311 * the list by compute_invariance_information().
313 assert(!var
->in_if_branch
&& !var
->in_nested_loop
&&
314 var
->type
!= invariant
);
316 /* We are only interested in checking phis for the basic induction
317 * variable case as its simple to detect. All basic induction variables
320 if (!is_var_phi(var
))
323 nir_phi_instr
*phi
= nir_instr_as_phi(var
->def
->parent_instr
);
324 nir_basic_induction_var
*biv
= rzalloc(state
, nir_basic_induction_var
);
326 nir_foreach_phi_src(src
, phi
) {
327 nir_loop_variable
*src_var
= get_loop_var(src
->src
.ssa
, state
);
329 /* If one of the sources is in an if branch or nested loop then don't
330 * attempt to go any further.
332 if (src_var
->in_if_branch
|| src_var
->in_nested_loop
)
335 /* Detect inductions variables that are incremented in both branches
336 * of an unnested if rather than in a loop block.
338 if (is_var_phi(src_var
)) {
339 nir_phi_instr
*src_phi
=
340 nir_instr_as_phi(src_var
->def
->parent_instr
);
341 nir_alu_instr
*src_phi_alu
= phi_instr_as_alu(src_phi
);
343 src_var
= get_loop_var(&src_phi_alu
->dest
.dest
.ssa
, state
);
344 if (!src_var
->in_if_branch
)
349 if (!src_var
->in_loop
) {
350 biv
->def_outside_loop
= src_var
;
351 } else if (is_var_alu(src_var
)) {
352 nir_alu_instr
*alu
= nir_instr_as_alu(src_var
->def
->parent_instr
);
354 if (nir_op_infos
[alu
->op
].num_inputs
== 2) {
355 biv
->alu_def
= src_var
;
356 biv
->alu_op
= alu
->op
;
358 for (unsigned i
= 0; i
< 2; i
++) {
359 /* Is one of the operands const, and the other the phi */
360 if (alu
->src
[i
].src
.ssa
->parent_instr
->type
== nir_instr_type_load_const
&&
361 alu
->src
[1-i
].src
.ssa
== &phi
->dest
.ssa
)
362 biv
->invariant
= get_loop_var(alu
->src
[i
].src
.ssa
, state
);
368 if (biv
->alu_def
&& biv
->def_outside_loop
&& biv
->invariant
&&
369 is_var_constant(biv
->def_outside_loop
)) {
370 assert(is_var_constant(biv
->invariant
));
371 biv
->alu_def
->type
= basic_induction
;
372 biv
->alu_def
->ind
= biv
;
373 var
->type
= basic_induction
;
376 found_induction_var
= true;
381 return found_induction_var
;
385 initialize_ssa_def(nir_ssa_def
*def
, void *void_state
)
387 loop_info_state
*state
= void_state
;
388 nir_loop_variable
*var
= get_loop_var(def
, state
);
390 var
->in_loop
= false;
393 if (def
->parent_instr
->type
== nir_instr_type_load_const
) {
394 var
->type
= invariant
;
396 var
->type
= undefined
;
403 find_loop_terminators(loop_info_state
*state
)
405 bool success
= false;
406 foreach_list_typed_safe(nir_cf_node
, node
, node
, &state
->loop
->body
) {
407 if (node
->type
== nir_cf_node_if
) {
408 nir_if
*nif
= nir_cf_node_as_if(node
);
410 nir_block
*break_blk
= NULL
;
411 nir_block
*continue_from_blk
= NULL
;
412 bool continue_from_then
= true;
414 nir_block
*last_then
= nir_if_last_then_block(nif
);
415 nir_block
*last_else
= nir_if_last_else_block(nif
);
416 if (nir_block_ends_in_break(last_then
)) {
417 break_blk
= last_then
;
418 continue_from_blk
= last_else
;
419 continue_from_then
= false;
420 } else if (nir_block_ends_in_break(last_else
)) {
421 break_blk
= last_else
;
422 continue_from_blk
= last_then
;
425 /* If there is a break then we should find a terminator. If we can
426 * not find a loop terminator, but there is a break-statement then
427 * we should return false so that we do not try to find trip-count
429 if (!nir_is_trivial_loop_if(nif
, break_blk
)) {
430 state
->loop
->info
->complex_loop
= true;
434 /* Continue if the if contained no jumps at all */
438 if (nif
->condition
.ssa
->parent_instr
->type
== nir_instr_type_phi
) {
439 state
->loop
->info
->complex_loop
= true;
443 nir_loop_terminator
*terminator
=
444 rzalloc(state
->loop
->info
, nir_loop_terminator
);
446 list_addtail(&terminator
->loop_terminator_link
,
447 &state
->loop
->info
->loop_terminator_list
);
449 terminator
->nif
= nif
;
450 terminator
->break_block
= break_blk
;
451 terminator
->continue_from_block
= continue_from_blk
;
452 terminator
->continue_from_then
= continue_from_then
;
453 terminator
->conditional_instr
= nif
->condition
.ssa
->parent_instr
;
462 /* This function looks for an array access within a loop that uses an
463 * induction variable for the array index. If found it returns the size of the
464 * array, otherwise 0 is returned. If we find an induction var we pass it back
465 * to the caller via array_index_out.
468 find_array_access_via_induction(loop_info_state
*state
,
469 nir_deref_instr
*deref
,
470 nir_loop_variable
**array_index_out
)
472 for (nir_deref_instr
*d
= deref
; d
; d
= nir_deref_instr_parent(d
)) {
473 if (d
->deref_type
!= nir_deref_type_array
)
476 assert(d
->arr
.index
.is_ssa
);
477 nir_loop_variable
*array_index
= get_loop_var(d
->arr
.index
.ssa
, state
);
479 if (array_index
->type
!= basic_induction
)
483 *array_index_out
= array_index
;
485 nir_deref_instr
*parent
= nir_deref_instr_parent(d
);
486 if (glsl_type_is_array_or_matrix(parent
->type
)) {
487 return glsl_get_length(parent
->type
);
489 assert(glsl_type_is_vector(parent
->type
));
490 return glsl_get_vector_elements(parent
->type
);
498 guess_loop_limit(loop_info_state
*state
, nir_const_value
*limit_val
,
499 nir_loop_variable
*basic_ind
)
501 unsigned min_array_size
= 0;
503 nir_foreach_block_in_cf_node(block
, &state
->loop
->cf_node
) {
504 nir_foreach_instr(instr
, block
) {
505 if (instr
->type
!= nir_instr_type_intrinsic
)
508 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
510 /* Check for arrays variably-indexed by a loop induction variable. */
511 if (intrin
->intrinsic
== nir_intrinsic_load_deref
||
512 intrin
->intrinsic
== nir_intrinsic_store_deref
||
513 intrin
->intrinsic
== nir_intrinsic_copy_deref
) {
515 nir_loop_variable
*array_idx
= NULL
;
516 unsigned array_size
=
517 find_array_access_via_induction(state
,
518 nir_src_as_deref(intrin
->src
[0]),
520 if (basic_ind
== array_idx
&&
521 (min_array_size
== 0 || min_array_size
> array_size
)) {
522 min_array_size
= array_size
;
525 if (intrin
->intrinsic
!= nir_intrinsic_copy_deref
)
529 find_array_access_via_induction(state
,
530 nir_src_as_deref(intrin
->src
[1]),
532 if (basic_ind
== array_idx
&&
533 (min_array_size
== 0 || min_array_size
> array_size
)) {
534 min_array_size
= array_size
;
540 if (min_array_size
) {
541 limit_val
->i32
= min_array_size
;
549 try_find_limit_of_alu(nir_loop_variable
*limit
, nir_const_value
*limit_val
,
550 nir_loop_terminator
*terminator
, loop_info_state
*state
)
552 if(!is_var_alu(limit
))
555 nir_alu_instr
*limit_alu
= nir_instr_as_alu(limit
->def
->parent_instr
);
557 if (limit_alu
->op
== nir_op_imin
||
558 limit_alu
->op
== nir_op_fmin
) {
559 limit
= get_loop_var(limit_alu
->src
[0].src
.ssa
, state
);
561 if (!is_var_constant(limit
))
562 limit
= get_loop_var(limit_alu
->src
[1].src
.ssa
, state
);
564 if (!is_var_constant(limit
))
567 *limit_val
= nir_instr_as_load_const(limit
->def
->parent_instr
)->value
[0];
569 terminator
->exact_trip_count_unknown
= true;
578 get_iteration(nir_op cond_op
, nir_const_value
*initial
, nir_const_value
*step
,
579 nir_const_value
*limit
)
588 int32_t initial_val
= initial
->i32
;
589 int32_t span
= limit
->i32
- initial_val
;
590 iter
= span
/ step
->i32
;
595 uint32_t initial_val
= initial
->u32
;
596 uint32_t span
= limit
->u32
- initial_val
;
597 iter
= span
/ step
->u32
;
604 float initial_val
= initial
->f32
;
605 float span
= limit
->f32
- initial_val
;
606 iter
= span
/ step
->f32
;
617 test_iterations(int32_t iter_int
, nir_const_value
*step
,
618 nir_const_value
*limit
, nir_op cond_op
, unsigned bit_size
,
619 nir_alu_type induction_base_type
,
620 nir_const_value
*initial
, bool limit_rhs
, bool invert_cond
)
622 assert(nir_op_infos
[cond_op
].num_inputs
== 2);
624 nir_const_value iter_src
= {0, };
627 switch (induction_base_type
) {
629 iter_src
.f32
= (float) iter_int
;
630 mul_op
= nir_op_fmul
;
631 add_op
= nir_op_fadd
;
635 iter_src
.i32
= iter_int
;
636 mul_op
= nir_op_imul
;
637 add_op
= nir_op_iadd
;
640 unreachable("Unhandled induction variable base type!");
643 /* Multiple the iteration count we are testing by the number of times we
644 * step the induction variable each iteration.
646 nir_const_value
*mul_src
[2] = { &iter_src
, step
};
647 nir_const_value mul_result
;
648 nir_eval_const_opcode(mul_op
, &mul_result
, 1, bit_size
, mul_src
);
650 /* Add the initial value to the accumulated induction variable total */
651 nir_const_value
*add_src
[2] = { &mul_result
, initial
};
652 nir_const_value add_result
;
653 nir_eval_const_opcode(add_op
, &add_result
, 1, bit_size
, add_src
);
655 nir_const_value
*src
[2];
656 src
[limit_rhs
? 0 : 1] = &add_result
;
657 src
[limit_rhs
? 1 : 0] = limit
;
659 /* Evaluate the loop exit condition */
660 nir_const_value result
;
661 nir_eval_const_opcode(cond_op
, &result
, 1, bit_size
, src
);
663 return invert_cond
? !result
.b
: result
.b
;
667 calculate_iterations(nir_const_value
*initial
, nir_const_value
*step
,
668 nir_const_value
*limit
, nir_loop_variable
*alu_def
,
669 nir_alu_instr
*cond_alu
, nir_op alu_op
, bool limit_rhs
,
672 assert(initial
!= NULL
&& step
!= NULL
&& limit
!= NULL
);
674 nir_alu_instr
*alu
= nir_instr_as_alu(alu_def
->def
->parent_instr
);
676 /* nir_op_isub should have been lowered away by this point */
677 assert(alu
->op
!= nir_op_isub
);
679 /* Make sure the alu type for our induction variable is compatible with the
680 * conditional alus input type. If its not something has gone really wrong.
682 nir_alu_type induction_base_type
=
683 nir_alu_type_get_base_type(nir_op_infos
[alu
->op
].output_type
);
684 if (induction_base_type
== nir_type_int
|| induction_base_type
== nir_type_uint
) {
685 assert(nir_alu_type_get_base_type(nir_op_infos
[alu_op
].input_types
[1]) == nir_type_int
||
686 nir_alu_type_get_base_type(nir_op_infos
[alu_op
].input_types
[1]) == nir_type_uint
);
688 assert(nir_alu_type_get_base_type(nir_op_infos
[alu_op
].input_types
[0]) ==
689 induction_base_type
);
692 /* Check for nsupported alu operations */
693 if (alu
->op
!= nir_op_iadd
&& alu
->op
!= nir_op_fadd
)
696 /* do-while loops can increment the starting value before the condition is
703 * Here we check if the induction variable is used directly by the loop
704 * condition and if so we assume we need to step the initial value.
706 unsigned trip_offset
= 0;
707 if (cond_alu
->src
[0].src
.ssa
== alu_def
->def
||
708 cond_alu
->src
[1].src
.ssa
== alu_def
->def
) {
712 int iter_int
= get_iteration(alu_op
, initial
, step
, limit
);
714 /* If iter_int is negative the loop is ill-formed or is the conditional is
715 * unsigned with a huge iteration count so don't bother going any further.
720 /* An explanation from the GLSL unrolling pass:
722 * Make sure that the calculated number of iterations satisfies the exit
723 * condition. This is needed to catch off-by-one errors and some types of
724 * ill-formed loops. For example, we need to detect that the following
725 * loop does not have a maximum iteration count.
727 * for (float x = 0.0; x != 0.9; x += 0.2);
729 assert(nir_src_bit_size(alu
->src
[0].src
) ==
730 nir_src_bit_size(alu
->src
[1].src
));
731 unsigned bit_size
= nir_src_bit_size(alu
->src
[0].src
);
732 for (int bias
= -1; bias
<= 1; bias
++) {
733 const int iter_bias
= iter_int
+ bias
;
735 if (test_iterations(iter_bias
, step
, limit
, alu_op
, bit_size
,
736 induction_base_type
, initial
,
737 limit_rhs
, invert_cond
)) {
738 return iter_bias
> 0 ? iter_bias
- trip_offset
: iter_bias
;
746 inverse_comparison(nir_alu_instr
*alu
)
770 unreachable("Unsuported comparison!");
775 is_supported_terminator_condition(nir_alu_instr
*alu
)
777 return nir_alu_instr_is_comparison(alu
) &&
778 nir_op_infos
[alu
->op
].num_inputs
== 2;
782 get_induction_and_limit_vars(nir_alu_instr
*alu
, nir_loop_variable
**ind
,
783 nir_loop_variable
**limit
,
784 loop_info_state
*state
)
786 bool limit_rhs
= true;
788 /* We assume that the limit is the "right" operand */
789 *ind
= get_loop_var(alu
->src
[0].src
.ssa
, state
);
790 *limit
= get_loop_var(alu
->src
[1].src
.ssa
, state
);
792 if ((*ind
)->type
!= basic_induction
) {
793 /* We had it the wrong way, flip things around */
794 *ind
= get_loop_var(alu
->src
[1].src
.ssa
, state
);
795 *limit
= get_loop_var(alu
->src
[0].src
.ssa
, state
);
803 try_find_trip_count_vars_in_iand(nir_alu_instr
**alu
,
804 nir_loop_variable
**ind
,
805 nir_loop_variable
**limit
,
807 loop_info_state
*state
)
809 assert((*alu
)->op
== nir_op_ieq
|| (*alu
)->op
== nir_op_inot
);
811 nir_ssa_def
*iand_def
= (*alu
)->src
[0].src
.ssa
;
813 if ((*alu
)->op
== nir_op_ieq
) {
814 nir_ssa_def
*zero_def
= (*alu
)->src
[1].src
.ssa
;
816 if (iand_def
->parent_instr
->type
!= nir_instr_type_alu
||
817 zero_def
->parent_instr
->type
!= nir_instr_type_load_const
) {
819 /* Maybe we had it the wrong way, flip things around */
820 iand_def
= (*alu
)->src
[1].src
.ssa
;
821 zero_def
= (*alu
)->src
[0].src
.ssa
;
823 /* If we still didn't find what we need then return */
824 if (zero_def
->parent_instr
->type
!= nir_instr_type_load_const
)
828 /* If the loop is not breaking on (x && y) == 0 then return */
829 nir_const_value
*zero
=
830 nir_instr_as_load_const(zero_def
->parent_instr
)->value
;
831 if (zero
[0].i32
!= 0)
835 if (iand_def
->parent_instr
->type
!= nir_instr_type_alu
)
838 nir_alu_instr
*iand
= nir_instr_as_alu(iand_def
->parent_instr
);
839 if (iand
->op
!= nir_op_iand
)
842 /* Check if iand src is a terminator condition and try get induction var
843 * and trip limit var.
845 nir_ssa_def
*src
= iand
->src
[0].src
.ssa
;
846 if (src
->parent_instr
->type
== nir_instr_type_alu
) {
847 *alu
= nir_instr_as_alu(src
->parent_instr
);
848 if (is_supported_terminator_condition(*alu
))
849 *limit_rhs
= get_induction_and_limit_vars(*alu
, ind
, limit
, state
);
852 /* Try the other iand src if needed */
853 if (*ind
== NULL
|| (*ind
&& (*ind
)->type
!= basic_induction
) ||
854 !is_var_constant(*limit
)) {
855 src
= iand
->src
[1].src
.ssa
;
856 if (src
->parent_instr
->type
== nir_instr_type_alu
) {
857 nir_alu_instr
*tmp_alu
= nir_instr_as_alu(src
->parent_instr
);
858 if (is_supported_terminator_condition(tmp_alu
)) {
860 *limit_rhs
= get_induction_and_limit_vars(*alu
, ind
, limit
, state
);
866 /* Run through each of the terminators of the loop and try to infer a possible
867 * trip-count. We need to check them all, and set the lowest trip-count as the
868 * trip-count of our loop. If one of the terminators has an undecidable
869 * trip-count we can not safely assume anything about the duration of the
873 find_trip_count(loop_info_state
*state
)
875 bool trip_count_known
= true;
876 bool guessed_trip_count
= false;
877 nir_loop_terminator
*limiting_terminator
= NULL
;
878 int max_trip_count
= -1;
880 list_for_each_entry(nir_loop_terminator
, terminator
,
881 &state
->loop
->info
->loop_terminator_list
,
882 loop_terminator_link
) {
884 if (terminator
->conditional_instr
->type
!= nir_instr_type_alu
) {
885 /* If we get here the loop is dead and will get cleaned up by the
886 * nir_opt_dead_cf pass.
888 trip_count_known
= false;
892 nir_alu_instr
*alu
= nir_instr_as_alu(terminator
->conditional_instr
);
893 nir_op alu_op
= alu
->op
;
896 nir_loop_variable
*basic_ind
= NULL
;
897 nir_loop_variable
*limit
;
898 if (alu
->op
== nir_op_inot
|| alu
->op
== nir_op_ieq
) {
899 nir_alu_instr
*new_alu
= alu
;
900 try_find_trip_count_vars_in_iand(&new_alu
, &basic_ind
, &limit
,
903 /* The loop is exiting on (x && y) == 0 so we need to get the
904 * inverse of x or y (i.e. which ever contained the induction var) in
905 * order to compute the trip count.
907 if (basic_ind
&& basic_ind
->type
== basic_induction
) {
909 alu_op
= inverse_comparison(alu
);
910 trip_count_known
= false;
911 terminator
->exact_trip_count_unknown
= true;
916 if (!is_supported_terminator_condition(alu
)) {
917 trip_count_known
= false;
921 limit_rhs
= get_induction_and_limit_vars(alu
, &basic_ind
, &limit
,
925 /* The comparison has to have a basic induction variable for us to be
926 * able to find trip counts.
928 if (basic_ind
->type
!= basic_induction
) {
929 trip_count_known
= false;
933 terminator
->induction_rhs
= !limit_rhs
;
935 /* Attempt to find a constant limit for the loop */
936 nir_const_value limit_val
;
937 if (is_var_constant(limit
)) {
939 nir_instr_as_load_const(limit
->def
->parent_instr
)->value
[0];
941 trip_count_known
= false;
943 if (!try_find_limit_of_alu(limit
, &limit_val
, terminator
, state
)) {
944 /* Guess loop limit based on array access */
945 if (!guess_loop_limit(state
, &limit_val
, basic_ind
)) {
949 guessed_trip_count
= true;
953 /* We have determined that we have the following constants:
954 * (With the typical int i = 0; i < x; i++; as an example)
957 * - Step / iteration size
958 * Thats all thats needed to calculate the trip-count
961 nir_const_value
*initial_val
=
962 nir_instr_as_load_const(basic_ind
->ind
->def_outside_loop
->
963 def
->parent_instr
)->value
;
965 nir_const_value
*step_val
=
966 nir_instr_as_load_const(basic_ind
->ind
->invariant
->def
->
967 parent_instr
)->value
;
969 int iterations
= calculate_iterations(initial_val
, step_val
,
971 basic_ind
->ind
->alu_def
, alu
,
973 terminator
->continue_from_then
);
975 /* Where we not able to calculate the iteration count */
976 if (iterations
== -1) {
977 trip_count_known
= false;
978 guessed_trip_count
= false;
982 if (guessed_trip_count
) {
983 guessed_trip_count
= false;
984 if (state
->loop
->info
->guessed_trip_count
== 0 ||
985 state
->loop
->info
->guessed_trip_count
> iterations
)
986 state
->loop
->info
->guessed_trip_count
= iterations
;
991 /* If this is the first run or we have found a smaller amount of
992 * iterations than previously (we have identified a more limiting
993 * terminator) set the trip count and limiting terminator.
995 if (max_trip_count
== -1 || iterations
< max_trip_count
) {
996 max_trip_count
= iterations
;
997 limiting_terminator
= terminator
;
1001 state
->loop
->info
->exact_trip_count_known
= trip_count_known
;
1002 if (max_trip_count
> -1)
1003 state
->loop
->info
->max_trip_count
= max_trip_count
;
1004 state
->loop
->info
->limiting_terminator
= limiting_terminator
;
1008 force_unroll_array_access(loop_info_state
*state
, nir_deref_instr
*deref
)
1010 unsigned array_size
= find_array_access_via_induction(state
, deref
, NULL
);
1012 if (array_size
== state
->loop
->info
->max_trip_count
)
1015 if (deref
->mode
& state
->indirect_mask
)
1023 force_unroll_heuristics(loop_info_state
*state
, nir_block
*block
)
1025 nir_foreach_instr(instr
, block
) {
1026 if (instr
->type
!= nir_instr_type_intrinsic
)
1029 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
1031 /* Check for arrays variably-indexed by a loop induction variable.
1032 * Unrolling the loop may convert that access into constant-indexing.
1034 if (intrin
->intrinsic
== nir_intrinsic_load_deref
||
1035 intrin
->intrinsic
== nir_intrinsic_store_deref
||
1036 intrin
->intrinsic
== nir_intrinsic_copy_deref
) {
1037 if (force_unroll_array_access(state
,
1038 nir_src_as_deref(intrin
->src
[0])))
1041 if (intrin
->intrinsic
== nir_intrinsic_copy_deref
&&
1042 force_unroll_array_access(state
,
1043 nir_src_as_deref(intrin
->src
[1])))
1052 get_loop_info(loop_info_state
*state
, nir_function_impl
*impl
)
1054 nir_shader
*shader
= impl
->function
->shader
;
1055 const nir_shader_compiler_options
*options
= shader
->options
;
1057 /* Initialize all variables to "outside_loop". This also marks defs
1058 * invariant and constant if they are nir_instr_type_load_consts
1060 nir_foreach_block(block
, impl
) {
1061 nir_foreach_instr(instr
, block
)
1062 nir_foreach_ssa_def(instr
, initialize_ssa_def
, state
);
1065 /* Add all entries in the outermost part of the loop to the processing list
1066 * Mark the entries in conditionals or in nested loops accordingly
1068 foreach_list_typed_safe(nir_cf_node
, node
, node
, &state
->loop
->body
) {
1069 switch (node
->type
) {
1071 case nir_cf_node_block
:
1072 init_loop_block(nir_cf_node_as_block(node
), state
,
1073 false, false, options
);
1076 case nir_cf_node_if
:
1077 nir_foreach_block_in_cf_node(block
, node
)
1078 init_loop_block(block
, state
, true, false, options
);
1081 case nir_cf_node_loop
:
1082 nir_foreach_block_in_cf_node(block
, node
) {
1083 init_loop_block(block
, state
, false, true, options
);
1087 case nir_cf_node_function
:
1092 /* Try to find all simple terminators of the loop. If we can't find any,
1093 * or we find possible terminators that have side effects then bail.
1095 if (!find_loop_terminators(state
)) {
1096 list_for_each_entry_safe(nir_loop_terminator
, terminator
,
1097 &state
->loop
->info
->loop_terminator_list
,
1098 loop_terminator_link
) {
1099 list_del(&terminator
->loop_terminator_link
);
1100 ralloc_free(terminator
);
1105 /* Induction analysis needs invariance information so get that first */
1106 compute_invariance_information(state
);
1108 /* We have invariance information so try to find induction variables */
1109 if (!compute_induction_information(state
))
1112 /* Run through each of the terminators and try to compute a trip-count */
1113 find_trip_count(state
);
1115 nir_foreach_block_in_cf_node(block
, &state
->loop
->cf_node
) {
1116 if (force_unroll_heuristics(state
, block
)) {
1117 state
->loop
->info
->force_unroll
= true;
1123 static loop_info_state
*
1124 initialize_loop_info_state(nir_loop
*loop
, void *mem_ctx
,
1125 nir_function_impl
*impl
)
1127 loop_info_state
*state
= rzalloc(mem_ctx
, loop_info_state
);
1128 state
->loop_vars
= rzalloc_array(mem_ctx
, nir_loop_variable
,
1132 list_inithead(&state
->process_list
);
1135 ralloc_free(loop
->info
);
1137 loop
->info
= rzalloc(loop
, nir_loop_info
);
1139 list_inithead(&loop
->info
->loop_terminator_list
);
1145 process_loops(nir_cf_node
*cf_node
, nir_variable_mode indirect_mask
)
1147 switch (cf_node
->type
) {
1148 case nir_cf_node_block
:
1150 case nir_cf_node_if
: {
1151 nir_if
*if_stmt
= nir_cf_node_as_if(cf_node
);
1152 foreach_list_typed(nir_cf_node
, nested_node
, node
, &if_stmt
->then_list
)
1153 process_loops(nested_node
, indirect_mask
);
1154 foreach_list_typed(nir_cf_node
, nested_node
, node
, &if_stmt
->else_list
)
1155 process_loops(nested_node
, indirect_mask
);
1158 case nir_cf_node_loop
: {
1159 nir_loop
*loop
= nir_cf_node_as_loop(cf_node
);
1160 foreach_list_typed(nir_cf_node
, nested_node
, node
, &loop
->body
)
1161 process_loops(nested_node
, indirect_mask
);
1165 unreachable("unknown cf node type");
1168 nir_loop
*loop
= nir_cf_node_as_loop(cf_node
);
1169 nir_function_impl
*impl
= nir_cf_node_get_function(cf_node
);
1170 void *mem_ctx
= ralloc_context(NULL
);
1172 loop_info_state
*state
= initialize_loop_info_state(loop
, mem_ctx
, impl
);
1173 state
->indirect_mask
= indirect_mask
;
1175 get_loop_info(state
, impl
);
1177 ralloc_free(mem_ctx
);
1181 nir_loop_analyze_impl(nir_function_impl
*impl
,
1182 nir_variable_mode indirect_mask
)
1184 nir_index_ssa_defs(impl
);
1185 foreach_list_typed(nir_cf_node
, node
, node
, &impl
->body
)
1186 process_loops(node
, indirect_mask
);