2 * Copyright © 2018 Valve Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
27 /* This pass computes for each ssa definition if it is uniform.
28 * That is, the variable has the same value for all invocations
31 * This divergence analysis pass expects the shader to be in LCSSA-form.
33 * This algorithm implements "The Simple Divergence Analysis" from
34 * Diogo Sampaio, Rafael De Souza, Sylvain Collange, Fernando Magno Quintão Pereira.
35 * Divergence Analysis. ACM Transactions on Programming Languages and Systems (TOPLAS),
36 * ACM, 2013, 35 (4), pp.13:1-13:36. <10.1145/2523815>. <hal-00909072v2>
39 struct divergence_state
{
40 const nir_divergence_options options
;
41 const gl_shader_stage stage
;
43 /** current control flow state */
44 /* True if some loop-active invocations might take a different control-flow path.
45 * A divergent break does not cause subsequent control-flow to be considered
46 * divergent because those invocations are no longer active in the loop.
47 * For a divergent if, both sides are considered divergent flow because
48 * the other side is still loop-active. */
49 bool divergent_loop_cf
;
50 /* True if a divergent continue happened since the loop header */
51 bool divergent_loop_continue
;
52 /* True if a divergent break happened since the loop header */
53 bool divergent_loop_break
;
55 /* True if we visit the block for the fist time */
60 visit_cf_list(struct exec_list
*list
, struct divergence_state
*state
);
63 visit_alu(nir_alu_instr
*instr
)
65 if (instr
->dest
.dest
.ssa
.divergent
)
68 unsigned num_src
= nir_op_infos
[instr
->op
].num_inputs
;
70 for (unsigned i
= 0; i
< num_src
; i
++) {
71 if (instr
->src
[i
].src
.ssa
->divergent
) {
72 instr
->dest
.dest
.ssa
.divergent
= true;
81 visit_intrinsic(nir_intrinsic_instr
*instr
, struct divergence_state
*state
)
83 if (!nir_intrinsic_infos
[instr
->intrinsic
].has_dest
)
86 if (instr
->dest
.ssa
.divergent
)
89 nir_divergence_options options
= state
->options
;
90 gl_shader_stage stage
= state
->stage
;
91 bool is_divergent
= false;
92 switch (instr
->intrinsic
) {
93 /* Intrinsics which are always uniform */
94 case nir_intrinsic_shader_clock
:
95 case nir_intrinsic_ballot
:
96 case nir_intrinsic_read_invocation
:
97 case nir_intrinsic_read_first_invocation
:
98 case nir_intrinsic_vote_any
:
99 case nir_intrinsic_vote_all
:
100 case nir_intrinsic_vote_feq
:
101 case nir_intrinsic_vote_ieq
:
102 case nir_intrinsic_load_work_dim
:
103 case nir_intrinsic_load_work_group_id
:
104 case nir_intrinsic_load_num_work_groups
:
105 case nir_intrinsic_load_local_group_size
:
106 case nir_intrinsic_load_subgroup_id
:
107 case nir_intrinsic_load_num_subgroups
:
108 case nir_intrinsic_load_subgroup_size
:
109 case nir_intrinsic_load_subgroup_eq_mask
:
110 case nir_intrinsic_load_subgroup_ge_mask
:
111 case nir_intrinsic_load_subgroup_gt_mask
:
112 case nir_intrinsic_load_subgroup_le_mask
:
113 case nir_intrinsic_load_subgroup_lt_mask
:
114 case nir_intrinsic_first_invocation
:
115 case nir_intrinsic_load_base_instance
:
116 case nir_intrinsic_load_base_vertex
:
117 case nir_intrinsic_load_first_vertex
:
118 case nir_intrinsic_load_draw_id
:
119 case nir_intrinsic_load_is_indexed_draw
:
120 case nir_intrinsic_load_viewport_scale
:
121 case nir_intrinsic_load_alpha_ref_float
:
122 case nir_intrinsic_load_user_clip_plane
:
123 case nir_intrinsic_load_viewport_x_scale
:
124 case nir_intrinsic_load_viewport_y_scale
:
125 case nir_intrinsic_load_viewport_z_scale
:
126 case nir_intrinsic_load_viewport_offset
:
127 case nir_intrinsic_load_viewport_z_offset
:
128 case nir_intrinsic_load_blend_const_color_a_float
:
129 case nir_intrinsic_load_blend_const_color_b_float
:
130 case nir_intrinsic_load_blend_const_color_g_float
:
131 case nir_intrinsic_load_blend_const_color_r_float
:
132 case nir_intrinsic_load_blend_const_color_rgba
:
133 case nir_intrinsic_load_blend_const_color_aaaa8888_unorm
:
134 case nir_intrinsic_load_blend_const_color_rgba8888_unorm
:
135 is_divergent
= false;
138 /* Intrinsics with divergence depending on shader stage and hardware */
139 case nir_intrinsic_load_input
:
140 is_divergent
= instr
->src
[0].ssa
->divergent
;
141 if (stage
== MESA_SHADER_FRAGMENT
)
142 is_divergent
|= !(options
& nir_divergence_single_prim_per_subgroup
);
143 else if (stage
== MESA_SHADER_TESS_EVAL
)
144 is_divergent
|= !(options
& nir_divergence_single_patch_per_tes_subgroup
);
148 case nir_intrinsic_load_per_vertex_input
:
149 is_divergent
= instr
->src
[0].ssa
->divergent
||
150 instr
->src
[1].ssa
->divergent
;
151 if (stage
== MESA_SHADER_TESS_CTRL
)
152 is_divergent
|= !(options
& nir_divergence_single_patch_per_tcs_subgroup
);
153 if (stage
== MESA_SHADER_TESS_EVAL
)
154 is_divergent
|= !(options
& nir_divergence_single_patch_per_tes_subgroup
);
158 case nir_intrinsic_load_input_vertex
:
159 is_divergent
= instr
->src
[1].ssa
->divergent
;
160 assert(stage
== MESA_SHADER_FRAGMENT
);
161 is_divergent
|= !(options
& nir_divergence_single_prim_per_subgroup
);
163 case nir_intrinsic_load_output
:
164 assert(stage
== MESA_SHADER_TESS_CTRL
|| stage
== MESA_SHADER_FRAGMENT
);
165 is_divergent
= instr
->src
[0].ssa
->divergent
;
166 if (stage
== MESA_SHADER_TESS_CTRL
)
167 is_divergent
|= !(options
& nir_divergence_single_patch_per_tcs_subgroup
);
171 case nir_intrinsic_load_per_vertex_output
:
172 assert(stage
== MESA_SHADER_TESS_CTRL
);
173 is_divergent
= instr
->src
[0].ssa
->divergent
||
174 instr
->src
[1].ssa
->divergent
||
175 !(options
& nir_divergence_single_patch_per_tcs_subgroup
);
177 case nir_intrinsic_load_layer_id
:
178 case nir_intrinsic_load_front_face
:
179 assert(stage
== MESA_SHADER_FRAGMENT
);
180 is_divergent
= !(options
& nir_divergence_single_prim_per_subgroup
);
182 case nir_intrinsic_load_view_index
:
183 assert(stage
!= MESA_SHADER_COMPUTE
&& stage
!= MESA_SHADER_KERNEL
);
184 if (options
& nir_divergence_view_index_uniform
)
185 is_divergent
= false;
186 else if (stage
== MESA_SHADER_FRAGMENT
)
187 is_divergent
= !(options
& nir_divergence_single_prim_per_subgroup
);
189 case nir_intrinsic_load_fs_input_interp_deltas
:
190 assert(stage
== MESA_SHADER_FRAGMENT
);
191 is_divergent
= instr
->src
[0].ssa
->divergent
;
192 is_divergent
|= !(options
& nir_divergence_single_prim_per_subgroup
);
194 case nir_intrinsic_load_primitive_id
:
195 if (stage
== MESA_SHADER_FRAGMENT
)
196 is_divergent
= !(options
& nir_divergence_single_prim_per_subgroup
);
197 else if (stage
== MESA_SHADER_TESS_CTRL
)
198 is_divergent
= !(options
& nir_divergence_single_patch_per_tcs_subgroup
);
199 else if (stage
== MESA_SHADER_TESS_EVAL
)
200 is_divergent
= !(options
& nir_divergence_single_patch_per_tes_subgroup
);
201 else if (stage
== MESA_SHADER_GEOMETRY
)
204 unreachable("Invalid stage for load_primitive_id");
206 case nir_intrinsic_load_tess_level_inner
:
207 case nir_intrinsic_load_tess_level_outer
:
208 if (stage
== MESA_SHADER_TESS_CTRL
)
209 is_divergent
= !(options
& nir_divergence_single_patch_per_tcs_subgroup
);
210 else if (stage
== MESA_SHADER_TESS_EVAL
)
211 is_divergent
= !(options
& nir_divergence_single_patch_per_tes_subgroup
);
213 unreachable("Invalid stage for load_primitive_tess_level_*");
215 case nir_intrinsic_load_patch_vertices_in
:
216 if (stage
== MESA_SHADER_TESS_EVAL
)
217 is_divergent
= !(options
& nir_divergence_single_patch_per_tes_subgroup
);
219 assert(stage
== MESA_SHADER_TESS_CTRL
);
222 /* Clustered reductions are uniform if cluster_size == subgroup_size or
223 * the source is uniform and the operation is invariant.
224 * Inclusive scans are uniform if
225 * the source is uniform and the operation is invariant
227 case nir_intrinsic_reduce
:
228 if (nir_intrinsic_cluster_size(instr
) == 0)
231 case nir_intrinsic_inclusive_scan
: {
232 nir_op op
= nir_intrinsic_reduction_op(instr
);
233 is_divergent
= instr
->src
[0].ssa
->divergent
;
234 if (op
!= nir_op_umin
&& op
!= nir_op_imin
&& op
!= nir_op_fmin
&&
235 op
!= nir_op_umax
&& op
!= nir_op_imax
&& op
!= nir_op_fmax
&&
236 op
!= nir_op_iand
&& op
!= nir_op_ior
)
241 /* Intrinsics with divergence depending on sources */
242 case nir_intrinsic_ballot_bitfield_extract
:
243 case nir_intrinsic_ballot_find_lsb
:
244 case nir_intrinsic_ballot_find_msb
:
245 case nir_intrinsic_ballot_bit_count_reduce
:
246 case nir_intrinsic_shuffle_xor
:
247 case nir_intrinsic_shuffle_up
:
248 case nir_intrinsic_shuffle_down
:
249 case nir_intrinsic_quad_broadcast
:
250 case nir_intrinsic_quad_swap_horizontal
:
251 case nir_intrinsic_quad_swap_vertical
:
252 case nir_intrinsic_quad_swap_diagonal
:
253 case nir_intrinsic_load_deref
:
254 case nir_intrinsic_load_ubo
:
255 case nir_intrinsic_load_ssbo
:
256 case nir_intrinsic_load_shared
:
257 case nir_intrinsic_load_global
:
258 case nir_intrinsic_load_global_constant
:
259 case nir_intrinsic_load_uniform
:
260 case nir_intrinsic_load_push_constant
:
261 case nir_intrinsic_load_constant
:
262 case nir_intrinsic_load_sample_pos_from_id
:
263 case nir_intrinsic_load_kernel_input
:
264 case nir_intrinsic_image_load
:
265 case nir_intrinsic_image_deref_load
:
266 case nir_intrinsic_bindless_image_load
:
267 case nir_intrinsic_image_samples
:
268 case nir_intrinsic_image_deref_samples
:
269 case nir_intrinsic_bindless_image_samples
:
270 case nir_intrinsic_get_buffer_size
:
271 case nir_intrinsic_image_size
:
272 case nir_intrinsic_image_deref_size
:
273 case nir_intrinsic_bindless_image_size
:
274 case nir_intrinsic_copy_deref
:
275 case nir_intrinsic_deref_buffer_array_length
:
276 case nir_intrinsic_vulkan_resource_index
:
277 case nir_intrinsic_vulkan_resource_reindex
:
278 case nir_intrinsic_load_vulkan_descriptor
:
279 case nir_intrinsic_atomic_counter_read
:
280 case nir_intrinsic_atomic_counter_read_deref
:
281 case nir_intrinsic_quad_swizzle_amd
:
282 case nir_intrinsic_masked_swizzle_amd
: {
283 unsigned num_srcs
= nir_intrinsic_infos
[instr
->intrinsic
].num_srcs
;
284 for (unsigned i
= 0; i
< num_srcs
; i
++) {
285 if (instr
->src
[i
].ssa
->divergent
) {
293 case nir_intrinsic_shuffle
:
294 is_divergent
= instr
->src
[0].ssa
->divergent
&&
295 instr
->src
[1].ssa
->divergent
;
298 /* Intrinsics which are always divergent */
299 case nir_intrinsic_load_color0
:
300 case nir_intrinsic_load_color1
:
301 case nir_intrinsic_load_param
:
302 case nir_intrinsic_load_sample_id
:
303 case nir_intrinsic_load_sample_id_no_per_sample
:
304 case nir_intrinsic_load_sample_mask_in
:
305 case nir_intrinsic_load_interpolated_input
:
306 case nir_intrinsic_load_barycentric_pixel
:
307 case nir_intrinsic_load_barycentric_centroid
:
308 case nir_intrinsic_load_barycentric_sample
:
309 case nir_intrinsic_load_barycentric_model
:
310 case nir_intrinsic_load_barycentric_at_sample
:
311 case nir_intrinsic_load_barycentric_at_offset
:
312 case nir_intrinsic_interp_deref_at_offset
:
313 case nir_intrinsic_interp_deref_at_sample
:
314 case nir_intrinsic_interp_deref_at_centroid
:
315 case nir_intrinsic_interp_deref_at_vertex
:
316 case nir_intrinsic_load_tess_coord
:
317 case nir_intrinsic_load_point_coord
:
318 case nir_intrinsic_load_line_coord
:
319 case nir_intrinsic_load_frag_coord
:
320 case nir_intrinsic_load_sample_pos
:
321 case nir_intrinsic_load_vertex_id_zero_base
:
322 case nir_intrinsic_load_vertex_id
:
323 case nir_intrinsic_load_instance_id
:
324 case nir_intrinsic_load_invocation_id
:
325 case nir_intrinsic_load_local_invocation_id
:
326 case nir_intrinsic_load_local_invocation_index
:
327 case nir_intrinsic_load_global_invocation_id
:
328 case nir_intrinsic_load_global_invocation_index
:
329 case nir_intrinsic_load_subgroup_invocation
:
330 case nir_intrinsic_load_helper_invocation
:
331 case nir_intrinsic_is_helper_invocation
:
332 case nir_intrinsic_load_scratch
:
333 case nir_intrinsic_deref_atomic_add
:
334 case nir_intrinsic_deref_atomic_imin
:
335 case nir_intrinsic_deref_atomic_umin
:
336 case nir_intrinsic_deref_atomic_imax
:
337 case nir_intrinsic_deref_atomic_umax
:
338 case nir_intrinsic_deref_atomic_and
:
339 case nir_intrinsic_deref_atomic_or
:
340 case nir_intrinsic_deref_atomic_xor
:
341 case nir_intrinsic_deref_atomic_exchange
:
342 case nir_intrinsic_deref_atomic_comp_swap
:
343 case nir_intrinsic_deref_atomic_fadd
:
344 case nir_intrinsic_deref_atomic_fmin
:
345 case nir_intrinsic_deref_atomic_fmax
:
346 case nir_intrinsic_deref_atomic_fcomp_swap
:
347 case nir_intrinsic_ssbo_atomic_add
:
348 case nir_intrinsic_ssbo_atomic_imin
:
349 case nir_intrinsic_ssbo_atomic_umin
:
350 case nir_intrinsic_ssbo_atomic_imax
:
351 case nir_intrinsic_ssbo_atomic_umax
:
352 case nir_intrinsic_ssbo_atomic_and
:
353 case nir_intrinsic_ssbo_atomic_or
:
354 case nir_intrinsic_ssbo_atomic_xor
:
355 case nir_intrinsic_ssbo_atomic_exchange
:
356 case nir_intrinsic_ssbo_atomic_comp_swap
:
357 case nir_intrinsic_ssbo_atomic_fadd
:
358 case nir_intrinsic_ssbo_atomic_fmax
:
359 case nir_intrinsic_ssbo_atomic_fmin
:
360 case nir_intrinsic_ssbo_atomic_fcomp_swap
:
361 case nir_intrinsic_image_deref_atomic_add
:
362 case nir_intrinsic_image_deref_atomic_imin
:
363 case nir_intrinsic_image_deref_atomic_umin
:
364 case nir_intrinsic_image_deref_atomic_imax
:
365 case nir_intrinsic_image_deref_atomic_umax
:
366 case nir_intrinsic_image_deref_atomic_and
:
367 case nir_intrinsic_image_deref_atomic_or
:
368 case nir_intrinsic_image_deref_atomic_xor
:
369 case nir_intrinsic_image_deref_atomic_exchange
:
370 case nir_intrinsic_image_deref_atomic_comp_swap
:
371 case nir_intrinsic_image_deref_atomic_fadd
:
372 case nir_intrinsic_image_atomic_add
:
373 case nir_intrinsic_image_atomic_imin
:
374 case nir_intrinsic_image_atomic_umin
:
375 case nir_intrinsic_image_atomic_imax
:
376 case nir_intrinsic_image_atomic_umax
:
377 case nir_intrinsic_image_atomic_and
:
378 case nir_intrinsic_image_atomic_or
:
379 case nir_intrinsic_image_atomic_xor
:
380 case nir_intrinsic_image_atomic_exchange
:
381 case nir_intrinsic_image_atomic_comp_swap
:
382 case nir_intrinsic_image_atomic_fadd
:
383 case nir_intrinsic_bindless_image_atomic_add
:
384 case nir_intrinsic_bindless_image_atomic_imin
:
385 case nir_intrinsic_bindless_image_atomic_umin
:
386 case nir_intrinsic_bindless_image_atomic_imax
:
387 case nir_intrinsic_bindless_image_atomic_umax
:
388 case nir_intrinsic_bindless_image_atomic_and
:
389 case nir_intrinsic_bindless_image_atomic_or
:
390 case nir_intrinsic_bindless_image_atomic_xor
:
391 case nir_intrinsic_bindless_image_atomic_exchange
:
392 case nir_intrinsic_bindless_image_atomic_comp_swap
:
393 case nir_intrinsic_bindless_image_atomic_fadd
:
394 case nir_intrinsic_shared_atomic_add
:
395 case nir_intrinsic_shared_atomic_imin
:
396 case nir_intrinsic_shared_atomic_umin
:
397 case nir_intrinsic_shared_atomic_imax
:
398 case nir_intrinsic_shared_atomic_umax
:
399 case nir_intrinsic_shared_atomic_and
:
400 case nir_intrinsic_shared_atomic_or
:
401 case nir_intrinsic_shared_atomic_xor
:
402 case nir_intrinsic_shared_atomic_exchange
:
403 case nir_intrinsic_shared_atomic_comp_swap
:
404 case nir_intrinsic_shared_atomic_fadd
:
405 case nir_intrinsic_shared_atomic_fmin
:
406 case nir_intrinsic_shared_atomic_fmax
:
407 case nir_intrinsic_shared_atomic_fcomp_swap
:
408 case nir_intrinsic_global_atomic_add
:
409 case nir_intrinsic_global_atomic_imin
:
410 case nir_intrinsic_global_atomic_umin
:
411 case nir_intrinsic_global_atomic_imax
:
412 case nir_intrinsic_global_atomic_umax
:
413 case nir_intrinsic_global_atomic_and
:
414 case nir_intrinsic_global_atomic_or
:
415 case nir_intrinsic_global_atomic_xor
:
416 case nir_intrinsic_global_atomic_exchange
:
417 case nir_intrinsic_global_atomic_comp_swap
:
418 case nir_intrinsic_global_atomic_fadd
:
419 case nir_intrinsic_global_atomic_fmin
:
420 case nir_intrinsic_global_atomic_fmax
:
421 case nir_intrinsic_global_atomic_fcomp_swap
:
422 case nir_intrinsic_atomic_counter_add
:
423 case nir_intrinsic_atomic_counter_min
:
424 case nir_intrinsic_atomic_counter_max
:
425 case nir_intrinsic_atomic_counter_and
:
426 case nir_intrinsic_atomic_counter_or
:
427 case nir_intrinsic_atomic_counter_xor
:
428 case nir_intrinsic_atomic_counter_inc
:
429 case nir_intrinsic_atomic_counter_pre_dec
:
430 case nir_intrinsic_atomic_counter_post_dec
:
431 case nir_intrinsic_atomic_counter_exchange
:
432 case nir_intrinsic_atomic_counter_comp_swap
:
433 case nir_intrinsic_atomic_counter_add_deref
:
434 case nir_intrinsic_atomic_counter_min_deref
:
435 case nir_intrinsic_atomic_counter_max_deref
:
436 case nir_intrinsic_atomic_counter_and_deref
:
437 case nir_intrinsic_atomic_counter_or_deref
:
438 case nir_intrinsic_atomic_counter_xor_deref
:
439 case nir_intrinsic_atomic_counter_inc_deref
:
440 case nir_intrinsic_atomic_counter_pre_dec_deref
:
441 case nir_intrinsic_atomic_counter_post_dec_deref
:
442 case nir_intrinsic_atomic_counter_exchange_deref
:
443 case nir_intrinsic_atomic_counter_comp_swap_deref
:
444 case nir_intrinsic_exclusive_scan
:
445 case nir_intrinsic_ballot_bit_count_exclusive
:
446 case nir_intrinsic_ballot_bit_count_inclusive
:
447 case nir_intrinsic_write_invocation_amd
:
448 case nir_intrinsic_mbcnt_amd
:
449 case nir_intrinsic_elect
:
458 nir_print_instr(&instr
->instr
, stderr
);
459 unreachable("\nNIR divergence analysis: Unhandled intrinsic.");
463 instr
->dest
.ssa
.divergent
= is_divergent
;
468 visit_tex(nir_tex_instr
*instr
)
470 if (instr
->dest
.ssa
.divergent
)
473 bool is_divergent
= false;
475 for (unsigned i
= 0; i
< instr
->num_srcs
; i
++) {
476 switch (instr
->src
[i
].src_type
) {
477 case nir_tex_src_sampler_deref
:
478 case nir_tex_src_sampler_handle
:
479 case nir_tex_src_sampler_offset
:
480 is_divergent
|= instr
->src
[i
].src
.ssa
->divergent
&&
481 instr
->sampler_non_uniform
;
483 case nir_tex_src_texture_deref
:
484 case nir_tex_src_texture_handle
:
485 case nir_tex_src_texture_offset
:
486 is_divergent
|= instr
->src
[i
].src
.ssa
->divergent
&&
487 instr
->texture_non_uniform
;
490 is_divergent
|= instr
->src
[i
].src
.ssa
->divergent
;
495 instr
->dest
.ssa
.divergent
= is_divergent
;
500 visit_load_const(nir_load_const_instr
*instr
)
506 visit_ssa_undef(nir_ssa_undef_instr
*instr
)
512 nir_variable_mode_is_uniform(nir_variable_mode mode
) {
514 case nir_var_uniform
:
515 case nir_var_mem_ubo
:
516 case nir_var_mem_ssbo
:
517 case nir_var_mem_shared
:
518 case nir_var_mem_global
:
526 nir_variable_is_uniform(nir_variable
*var
, struct divergence_state
*state
)
528 if (nir_variable_mode_is_uniform(var
->data
.mode
))
531 if (state
->stage
== MESA_SHADER_FRAGMENT
&&
532 (state
->options
& nir_divergence_single_prim_per_subgroup
) &&
533 var
->data
.mode
== nir_var_shader_in
&&
534 var
->data
.interpolation
== INTERP_MODE_FLAT
)
537 if (state
->stage
== MESA_SHADER_TESS_CTRL
&&
538 (state
->options
& nir_divergence_single_patch_per_tcs_subgroup
) &&
539 var
->data
.mode
== nir_var_shader_out
&& var
->data
.patch
)
542 if (state
->stage
== MESA_SHADER_TESS_EVAL
&&
543 (state
->options
& nir_divergence_single_patch_per_tes_subgroup
) &&
544 var
->data
.mode
== nir_var_shader_in
&& var
->data
.patch
)
551 visit_deref(nir_deref_instr
*deref
, struct divergence_state
*state
)
553 if (deref
->dest
.ssa
.divergent
)
556 bool is_divergent
= false;
557 switch (deref
->deref_type
) {
558 case nir_deref_type_var
:
559 is_divergent
= !nir_variable_is_uniform(deref
->var
, state
);
561 case nir_deref_type_array
:
562 case nir_deref_type_ptr_as_array
:
563 is_divergent
= deref
->arr
.index
.ssa
->divergent
;
565 case nir_deref_type_struct
:
566 case nir_deref_type_array_wildcard
:
567 is_divergent
|= deref
->parent
.ssa
->divergent
;
569 case nir_deref_type_cast
:
570 is_divergent
= !nir_variable_mode_is_uniform(deref
->var
->data
.mode
) ||
571 deref
->parent
.ssa
->divergent
;
575 deref
->dest
.ssa
.divergent
= is_divergent
;
580 visit_jump(nir_jump_instr
*jump
, struct divergence_state
*state
)
582 switch (jump
->type
) {
583 case nir_jump_continue
:
584 if (state
->divergent_loop_continue
)
586 if (state
->divergent_loop_cf
)
587 state
->divergent_loop_continue
= true;
588 return state
->divergent_loop_continue
;
590 if (state
->divergent_loop_break
)
592 if (state
->divergent_loop_cf
)
593 state
->divergent_loop_break
= true;
594 return state
->divergent_loop_break
;
595 case nir_jump_return
:
596 unreachable("NIR divergence analysis: Unsupported return instruction.");
599 case nir_jump_goto_if
:
600 unreachable("NIR divergence analysis: Unsupported goto_if instruction.");
607 set_ssa_def_not_divergent(nir_ssa_def
*def
, UNUSED
void *_state
)
609 def
->divergent
= false;
614 visit_block(nir_block
*block
, struct divergence_state
*state
)
616 bool has_changed
= false;
618 nir_foreach_instr(instr
, block
) {
619 /* phis are handled when processing the branches */
620 if (instr
->type
== nir_instr_type_phi
)
623 if (state
->first_visit
)
624 nir_foreach_ssa_def(instr
, set_ssa_def_not_divergent
, NULL
);
626 switch (instr
->type
) {
627 case nir_instr_type_alu
:
628 has_changed
|= visit_alu(nir_instr_as_alu(instr
));
630 case nir_instr_type_intrinsic
:
631 has_changed
|= visit_intrinsic(nir_instr_as_intrinsic(instr
), state
);
633 case nir_instr_type_tex
:
634 has_changed
|= visit_tex(nir_instr_as_tex(instr
));
636 case nir_instr_type_load_const
:
637 has_changed
|= visit_load_const(nir_instr_as_load_const(instr
));
639 case nir_instr_type_ssa_undef
:
640 has_changed
|= visit_ssa_undef(nir_instr_as_ssa_undef(instr
));
642 case nir_instr_type_deref
:
643 has_changed
|= visit_deref(nir_instr_as_deref(instr
), state
);
645 case nir_instr_type_jump
:
646 has_changed
|= visit_jump(nir_instr_as_jump(instr
), state
);
648 case nir_instr_type_phi
:
649 case nir_instr_type_call
:
650 case nir_instr_type_parallel_copy
:
651 unreachable("NIR divergence analysis: Unsupported instruction type.");
658 /* There are 3 types of phi instructions:
659 * (1) gamma: represent the joining point of different paths
660 * created by an “if-then-else” branch.
661 * The resulting value is divergent if the branch condition
662 * or any of the source values is divergent. */
664 visit_if_merge_phi(nir_phi_instr
*phi
, bool if_cond_divergent
)
666 if (phi
->dest
.ssa
.divergent
)
669 unsigned defined_srcs
= 0;
670 nir_foreach_phi_src(src
, phi
) {
671 /* if any source value is divergent, the resulting value is divergent */
672 if (src
->src
.ssa
->divergent
) {
673 phi
->dest
.ssa
.divergent
= true;
676 if (src
->src
.ssa
->parent_instr
->type
!= nir_instr_type_ssa_undef
) {
681 /* if the condition is divergent and two sources defined, the definition is divergent */
682 if (defined_srcs
> 1 && if_cond_divergent
) {
683 phi
->dest
.ssa
.divergent
= true;
690 /* There are 3 types of phi instructions:
691 * (2) mu: which only exist at loop headers,
692 * merge initial and loop-carried values.
693 * The resulting value is divergent if any source value
694 * is divergent or a divergent loop continue condition
695 * is associated with a different ssa-def. */
697 visit_loop_header_phi(nir_phi_instr
*phi
, nir_block
*preheader
, bool divergent_continue
)
699 if (phi
->dest
.ssa
.divergent
)
702 nir_ssa_def
* same
= NULL
;
703 nir_foreach_phi_src(src
, phi
) {
704 /* if any source value is divergent, the resulting value is divergent */
705 if (src
->src
.ssa
->divergent
) {
706 phi
->dest
.ssa
.divergent
= true;
709 /* if this loop is uniform, we're done here */
710 if (!divergent_continue
)
712 /* skip the loop preheader */
713 if (src
->pred
== preheader
)
715 /* skip undef values */
716 if (src
->src
.ssa
->parent_instr
->type
== nir_instr_type_ssa_undef
)
719 /* check if all loop-carried values are from the same ssa-def */
722 else if (same
!= src
->src
.ssa
) {
723 phi
->dest
.ssa
.divergent
= true;
731 /* There are 3 types of phi instructions:
732 * (3) eta: represent values that leave a loop.
733 * The resulting value is divergent if the source value is divergent
734 * or any loop exit condition is divergent for a value which is
735 * not loop-invariant.
736 * (note: there should be no phi for loop-invariant variables.) */
738 visit_loop_exit_phi(nir_phi_instr
*phi
, bool divergent_break
)
740 if (phi
->dest
.ssa
.divergent
)
743 if (divergent_break
) {
744 phi
->dest
.ssa
.divergent
= true;
748 /* if any source value is divergent, the resulting value is divergent */
749 nir_foreach_phi_src(src
, phi
) {
750 if (src
->src
.ssa
->divergent
) {
751 phi
->dest
.ssa
.divergent
= true;
760 visit_if(nir_if
*if_stmt
, struct divergence_state
*state
)
762 bool progress
= false;
764 struct divergence_state then_state
= *state
;
765 then_state
.divergent_loop_cf
|= if_stmt
->condition
.ssa
->divergent
;
766 progress
|= visit_cf_list(&if_stmt
->then_list
, &then_state
);
768 struct divergence_state else_state
= *state
;
769 else_state
.divergent_loop_cf
|= if_stmt
->condition
.ssa
->divergent
;
770 progress
|= visit_cf_list(&if_stmt
->else_list
, &else_state
);
772 /* handle phis after the IF */
773 nir_foreach_instr(instr
, nir_cf_node_cf_tree_next(&if_stmt
->cf_node
)) {
774 if (instr
->type
!= nir_instr_type_phi
)
777 if (state
->first_visit
)
778 nir_instr_as_phi(instr
)->dest
.ssa
.divergent
= false;
779 progress
|= visit_if_merge_phi(nir_instr_as_phi(instr
),
780 if_stmt
->condition
.ssa
->divergent
);
783 /* join loop divergence information from both branch legs */
784 state
->divergent_loop_continue
|= then_state
.divergent_loop_continue
||
785 else_state
.divergent_loop_continue
;
786 state
->divergent_loop_break
|= then_state
.divergent_loop_break
||
787 else_state
.divergent_loop_break
;
789 /* A divergent continue makes succeeding loop CF divergent:
790 * not all loop-active invocations participate in the remaining loop-body
791 * which means that a following break might be taken by some invocations, only */
792 state
->divergent_loop_cf
|= state
->divergent_loop_continue
;
798 visit_loop(nir_loop
*loop
, struct divergence_state
*state
)
800 bool progress
= false;
801 nir_block
*loop_header
= nir_loop_first_block(loop
);
802 nir_block
*loop_preheader
= nir_block_cf_tree_prev(loop_header
);
804 /* handle loop header phis first: we have no knowledge yet about
805 * the loop's control flow or any loop-carried sources. */
806 nir_foreach_instr(instr
, loop_header
) {
807 if (instr
->type
!= nir_instr_type_phi
)
810 nir_phi_instr
*phi
= nir_instr_as_phi(instr
);
811 if (!state
->first_visit
&& phi
->dest
.ssa
.divergent
)
814 nir_foreach_phi_src(src
, phi
) {
815 if (src
->pred
== loop_preheader
) {
816 phi
->dest
.ssa
.divergent
= src
->src
.ssa
->divergent
;
820 progress
|= phi
->dest
.ssa
.divergent
;
823 /* setup loop state */
824 struct divergence_state loop_state
= *state
;
825 loop_state
.divergent_loop_cf
= false;
826 loop_state
.divergent_loop_continue
= false;
827 loop_state
.divergent_loop_break
= false;
829 /* process loop body until no further changes are made */
832 progress
|= visit_cf_list(&loop
->body
, &loop_state
);
835 /* revisit loop header phis to see if something has changed */
836 nir_foreach_instr(instr
, loop_header
) {
837 if (instr
->type
!= nir_instr_type_phi
)
840 repeat
|= visit_loop_header_phi(nir_instr_as_phi(instr
),
842 loop_state
.divergent_loop_continue
);
845 loop_state
.divergent_loop_cf
= false;
846 loop_state
.first_visit
= false;
849 /* handle phis after the loop */
850 nir_foreach_instr(instr
, nir_cf_node_cf_tree_next(&loop
->cf_node
)) {
851 if (instr
->type
!= nir_instr_type_phi
)
854 if (state
->first_visit
)
855 nir_instr_as_phi(instr
)->dest
.ssa
.divergent
= false;
856 progress
|= visit_loop_exit_phi(nir_instr_as_phi(instr
),
857 loop_state
.divergent_loop_break
);
864 visit_cf_list(struct exec_list
*list
, struct divergence_state
*state
)
866 bool has_changed
= false;
868 foreach_list_typed(nir_cf_node
, node
, node
, list
) {
869 switch (node
->type
) {
870 case nir_cf_node_block
:
871 has_changed
|= visit_block(nir_cf_node_as_block(node
), state
);
874 has_changed
|= visit_if(nir_cf_node_as_if(node
), state
);
876 case nir_cf_node_loop
:
877 has_changed
|= visit_loop(nir_cf_node_as_loop(node
), state
);
879 case nir_cf_node_function
:
880 unreachable("NIR divergence analysis: Unsupported cf_node type.");
888 nir_divergence_analysis(nir_shader
*shader
, nir_divergence_options options
)
890 struct divergence_state state
= {
892 .stage
= shader
->info
.stage
,
893 .divergent_loop_cf
= false,
894 .divergent_loop_continue
= false,
895 .divergent_loop_break
= false,
899 visit_cf_list(&nir_shader_get_entrypoint(shader
)->body
, &state
);