2 * Copyright © 2018 Valve Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
27 /* This pass computes for each ssa definition if it is uniform.
28 * That is, the variable has the same value for all invocations
31 * This divergence analysis pass expects the shader to be in LCSSA-form.
33 * This algorithm implements "The Simple Divergence Analysis" from
34 * Diogo Sampaio, Rafael De Souza, Sylvain Collange, Fernando Magno Quintão Pereira.
35 * Divergence Analysis. ACM Transactions on Programming Languages and Systems (TOPLAS),
36 * ACM, 2013, 35 (4), pp.13:1-13:36. <10.1145/2523815>. <hal-00909072v2>
40 visit_cf_list(bool *divergent
, struct exec_list
*list
,
41 nir_divergence_options options
, gl_shader_stage stage
);
44 visit_alu(bool *divergent
, nir_alu_instr
*instr
)
46 if (divergent
[instr
->dest
.dest
.ssa
.index
])
49 unsigned num_src
= nir_op_infos
[instr
->op
].num_inputs
;
51 for (unsigned i
= 0; i
< num_src
; i
++) {
52 if (divergent
[instr
->src
[i
].src
.ssa
->index
]) {
53 divergent
[instr
->dest
.dest
.ssa
.index
] = true;
62 visit_intrinsic(bool *divergent
, nir_intrinsic_instr
*instr
,
63 nir_divergence_options options
, gl_shader_stage stage
)
65 if (!nir_intrinsic_infos
[instr
->intrinsic
].has_dest
)
68 if (divergent
[instr
->dest
.ssa
.index
])
71 bool is_divergent
= false;
72 switch (instr
->intrinsic
) {
73 /* Intrinsics which are always uniform */
74 case nir_intrinsic_shader_clock
:
75 case nir_intrinsic_ballot
:
76 case nir_intrinsic_read_invocation
:
77 case nir_intrinsic_read_first_invocation
:
78 case nir_intrinsic_vote_any
:
79 case nir_intrinsic_vote_all
:
80 case nir_intrinsic_vote_feq
:
81 case nir_intrinsic_vote_ieq
:
82 case nir_intrinsic_load_work_dim
:
83 case nir_intrinsic_load_work_group_id
:
84 case nir_intrinsic_load_num_work_groups
:
85 case nir_intrinsic_load_local_group_size
:
86 case nir_intrinsic_load_subgroup_id
:
87 case nir_intrinsic_load_num_subgroups
:
88 case nir_intrinsic_load_subgroup_size
:
89 case nir_intrinsic_load_subgroup_eq_mask
:
90 case nir_intrinsic_load_subgroup_ge_mask
:
91 case nir_intrinsic_load_subgroup_gt_mask
:
92 case nir_intrinsic_load_subgroup_le_mask
:
93 case nir_intrinsic_load_subgroup_lt_mask
:
94 case nir_intrinsic_first_invocation
:
95 case nir_intrinsic_load_base_instance
:
96 case nir_intrinsic_load_base_vertex
:
97 case nir_intrinsic_load_first_vertex
:
98 case nir_intrinsic_load_draw_id
:
99 case nir_intrinsic_load_is_indexed_draw
:
100 case nir_intrinsic_load_viewport_scale
:
101 case nir_intrinsic_load_alpha_ref_float
:
102 case nir_intrinsic_load_user_clip_plane
:
103 case nir_intrinsic_load_viewport_x_scale
:
104 case nir_intrinsic_load_viewport_y_scale
:
105 case nir_intrinsic_load_viewport_z_scale
:
106 case nir_intrinsic_load_viewport_offset
:
107 case nir_intrinsic_load_viewport_z_offset
:
108 case nir_intrinsic_load_blend_const_color_a_float
:
109 case nir_intrinsic_load_blend_const_color_b_float
:
110 case nir_intrinsic_load_blend_const_color_g_float
:
111 case nir_intrinsic_load_blend_const_color_r_float
:
112 case nir_intrinsic_load_blend_const_color_rgba
:
113 case nir_intrinsic_load_blend_const_color_aaaa8888_unorm
:
114 case nir_intrinsic_load_blend_const_color_rgba8888_unorm
:
115 is_divergent
= false;
118 /* Intrinsics with divergence depending on shader stage and hardware */
119 case nir_intrinsic_load_input
:
120 is_divergent
= divergent
[instr
->src
[0].ssa
->index
];
121 if (stage
== MESA_SHADER_FRAGMENT
)
122 is_divergent
|= !(options
& nir_divergence_single_prim_per_subgroup
);
123 else if (stage
== MESA_SHADER_TESS_EVAL
)
124 is_divergent
|= !(options
& nir_divergence_single_patch_per_tes_subgroup
);
128 case nir_intrinsic_load_input_vertex
:
129 is_divergent
= divergent
[instr
->src
[1].ssa
->index
];
130 assert(stage
== MESA_SHADER_FRAGMENT
);
131 is_divergent
|= !(options
& nir_divergence_single_prim_per_subgroup
);
133 case nir_intrinsic_load_output
:
134 assert(stage
== MESA_SHADER_TESS_CTRL
|| stage
== MESA_SHADER_FRAGMENT
);
135 is_divergent
= divergent
[instr
->src
[0].ssa
->index
];
136 if (stage
== MESA_SHADER_TESS_CTRL
)
137 is_divergent
|= !(options
& nir_divergence_single_patch_per_tcs_subgroup
);
141 case nir_intrinsic_load_layer_id
:
142 case nir_intrinsic_load_front_face
:
143 assert(stage
== MESA_SHADER_FRAGMENT
);
144 is_divergent
= !(options
& nir_divergence_single_prim_per_subgroup
);
146 case nir_intrinsic_load_view_index
:
147 assert(stage
!= MESA_SHADER_COMPUTE
&& stage
!= MESA_SHADER_KERNEL
);
148 if (options
& nir_divergence_view_index_uniform
)
149 is_divergent
= false;
150 else if (stage
== MESA_SHADER_FRAGMENT
)
151 is_divergent
= !(options
& nir_divergence_single_prim_per_subgroup
);
153 case nir_intrinsic_load_fs_input_interp_deltas
:
154 assert(stage
== MESA_SHADER_FRAGMENT
);
155 is_divergent
= divergent
[instr
->src
[0].ssa
->index
];
156 is_divergent
|= !(options
& nir_divergence_single_prim_per_subgroup
);
158 case nir_intrinsic_load_primitive_id
:
159 if (stage
== MESA_SHADER_FRAGMENT
)
160 is_divergent
= !(options
& nir_divergence_single_prim_per_subgroup
);
161 else if (stage
== MESA_SHADER_TESS_CTRL
)
162 is_divergent
= !(options
& nir_divergence_single_patch_per_tcs_subgroup
);
163 else if (stage
== MESA_SHADER_TESS_EVAL
)
164 is_divergent
= !(options
& nir_divergence_single_patch_per_tes_subgroup
);
165 else if (stage
== MESA_SHADER_GEOMETRY
)
168 unreachable("Invalid stage for load_primitive_id");
170 case nir_intrinsic_load_tess_level_inner
:
171 case nir_intrinsic_load_tess_level_outer
:
172 if (stage
== MESA_SHADER_TESS_CTRL
)
173 is_divergent
= !(options
& nir_divergence_single_patch_per_tcs_subgroup
);
174 else if (stage
== MESA_SHADER_TESS_EVAL
)
175 is_divergent
= !(options
& nir_divergence_single_patch_per_tes_subgroup
);
177 unreachable("Invalid stage for load_primitive_tess_level_*");
179 case nir_intrinsic_load_patch_vertices_in
:
180 if (stage
== MESA_SHADER_TESS_EVAL
)
181 is_divergent
= !(options
& nir_divergence_single_patch_per_tes_subgroup
);
183 assert(stage
== MESA_SHADER_TESS_CTRL
);
186 /* Clustered reductions are uniform if cluster_size == subgroup_size or
187 * the source is uniform and the operation is invariant.
188 * Inclusive scans are uniform if
189 * the source is uniform and the operation is invariant
191 case nir_intrinsic_reduce
:
192 if (nir_intrinsic_cluster_size(instr
) == 0)
195 case nir_intrinsic_inclusive_scan
: {
196 nir_op op
= nir_intrinsic_reduction_op(instr
);
197 is_divergent
= divergent
[instr
->src
[0].ssa
->index
];
198 if (op
!= nir_op_umin
&& op
!= nir_op_imin
&& op
!= nir_op_fmin
&&
199 op
!= nir_op_umax
&& op
!= nir_op_imax
&& op
!= nir_op_fmax
&&
200 op
!= nir_op_iand
&& op
!= nir_op_ior
)
205 /* Intrinsics with divergence depending on sources */
206 case nir_intrinsic_ballot_bitfield_extract
:
207 case nir_intrinsic_ballot_find_lsb
:
208 case nir_intrinsic_ballot_find_msb
:
209 case nir_intrinsic_ballot_bit_count_reduce
:
210 case nir_intrinsic_shuffle_xor
:
211 case nir_intrinsic_shuffle_up
:
212 case nir_intrinsic_shuffle_down
:
213 case nir_intrinsic_quad_broadcast
:
214 case nir_intrinsic_quad_swap_horizontal
:
215 case nir_intrinsic_quad_swap_vertical
:
216 case nir_intrinsic_quad_swap_diagonal
:
217 case nir_intrinsic_load_deref
:
218 case nir_intrinsic_load_ubo
:
219 case nir_intrinsic_load_ssbo
:
220 case nir_intrinsic_load_shared
:
221 case nir_intrinsic_load_global
:
222 case nir_intrinsic_load_uniform
:
223 case nir_intrinsic_load_push_constant
:
224 case nir_intrinsic_load_constant
:
225 case nir_intrinsic_load_sample_pos_from_id
:
226 case nir_intrinsic_load_kernel_input
:
227 case nir_intrinsic_image_load
:
228 case nir_intrinsic_image_deref_load
:
229 case nir_intrinsic_bindless_image_load
:
230 case nir_intrinsic_image_samples
:
231 case nir_intrinsic_image_deref_samples
:
232 case nir_intrinsic_bindless_image_samples
:
233 case nir_intrinsic_get_buffer_size
:
234 case nir_intrinsic_image_size
:
235 case nir_intrinsic_image_deref_size
:
236 case nir_intrinsic_bindless_image_size
:
237 case nir_intrinsic_copy_deref
:
238 case nir_intrinsic_deref_buffer_array_length
:
239 case nir_intrinsic_vulkan_resource_index
:
240 case nir_intrinsic_vulkan_resource_reindex
:
241 case nir_intrinsic_load_vulkan_descriptor
:
242 case nir_intrinsic_atomic_counter_read
:
243 case nir_intrinsic_atomic_counter_read_deref
:
244 case nir_intrinsic_quad_swizzle_amd
:
245 case nir_intrinsic_masked_swizzle_amd
: {
246 unsigned num_srcs
= nir_intrinsic_infos
[instr
->intrinsic
].num_srcs
;
247 for (unsigned i
= 0; i
< num_srcs
; i
++) {
248 if (divergent
[instr
->src
[i
].ssa
->index
]) {
256 case nir_intrinsic_shuffle
:
257 is_divergent
= divergent
[instr
->src
[0].ssa
->index
] &&
258 divergent
[instr
->src
[1].ssa
->index
];
261 /* Intrinsics which are always divergent */
262 case nir_intrinsic_load_color0
:
263 case nir_intrinsic_load_color1
:
264 case nir_intrinsic_load_param
:
265 case nir_intrinsic_load_sample_id
:
266 case nir_intrinsic_load_sample_id_no_per_sample
:
267 case nir_intrinsic_load_sample_mask_in
:
268 case nir_intrinsic_load_interpolated_input
:
269 case nir_intrinsic_load_barycentric_pixel
:
270 case nir_intrinsic_load_barycentric_centroid
:
271 case nir_intrinsic_load_barycentric_sample
:
272 case nir_intrinsic_load_barycentric_model
:
273 case nir_intrinsic_load_barycentric_at_sample
:
274 case nir_intrinsic_load_barycentric_at_offset
:
275 case nir_intrinsic_interp_deref_at_offset
:
276 case nir_intrinsic_interp_deref_at_sample
:
277 case nir_intrinsic_interp_deref_at_centroid
:
278 case nir_intrinsic_interp_deref_at_vertex
:
279 case nir_intrinsic_load_tess_coord
:
280 case nir_intrinsic_load_point_coord
:
281 case nir_intrinsic_load_frag_coord
:
282 case nir_intrinsic_load_sample_pos
:
283 case nir_intrinsic_load_vertex_id_zero_base
:
284 case nir_intrinsic_load_vertex_id
:
285 case nir_intrinsic_load_per_vertex_input
:
286 case nir_intrinsic_load_per_vertex_output
:
287 case nir_intrinsic_load_instance_id
:
288 case nir_intrinsic_load_invocation_id
:
289 case nir_intrinsic_load_local_invocation_id
:
290 case nir_intrinsic_load_local_invocation_index
:
291 case nir_intrinsic_load_global_invocation_id
:
292 case nir_intrinsic_load_global_invocation_index
:
293 case nir_intrinsic_load_subgroup_invocation
:
294 case nir_intrinsic_load_helper_invocation
:
295 case nir_intrinsic_is_helper_invocation
:
296 case nir_intrinsic_load_scratch
:
297 case nir_intrinsic_deref_atomic_add
:
298 case nir_intrinsic_deref_atomic_imin
:
299 case nir_intrinsic_deref_atomic_umin
:
300 case nir_intrinsic_deref_atomic_imax
:
301 case nir_intrinsic_deref_atomic_umax
:
302 case nir_intrinsic_deref_atomic_and
:
303 case nir_intrinsic_deref_atomic_or
:
304 case nir_intrinsic_deref_atomic_xor
:
305 case nir_intrinsic_deref_atomic_exchange
:
306 case nir_intrinsic_deref_atomic_comp_swap
:
307 case nir_intrinsic_deref_atomic_fadd
:
308 case nir_intrinsic_deref_atomic_fmin
:
309 case nir_intrinsic_deref_atomic_fmax
:
310 case nir_intrinsic_deref_atomic_fcomp_swap
:
311 case nir_intrinsic_ssbo_atomic_add
:
312 case nir_intrinsic_ssbo_atomic_imin
:
313 case nir_intrinsic_ssbo_atomic_umin
:
314 case nir_intrinsic_ssbo_atomic_imax
:
315 case nir_intrinsic_ssbo_atomic_umax
:
316 case nir_intrinsic_ssbo_atomic_and
:
317 case nir_intrinsic_ssbo_atomic_or
:
318 case nir_intrinsic_ssbo_atomic_xor
:
319 case nir_intrinsic_ssbo_atomic_exchange
:
320 case nir_intrinsic_ssbo_atomic_comp_swap
:
321 case nir_intrinsic_ssbo_atomic_fadd
:
322 case nir_intrinsic_ssbo_atomic_fmax
:
323 case nir_intrinsic_ssbo_atomic_fmin
:
324 case nir_intrinsic_ssbo_atomic_fcomp_swap
:
325 case nir_intrinsic_image_deref_atomic_add
:
326 case nir_intrinsic_image_deref_atomic_imin
:
327 case nir_intrinsic_image_deref_atomic_umin
:
328 case nir_intrinsic_image_deref_atomic_imax
:
329 case nir_intrinsic_image_deref_atomic_umax
:
330 case nir_intrinsic_image_deref_atomic_and
:
331 case nir_intrinsic_image_deref_atomic_or
:
332 case nir_intrinsic_image_deref_atomic_xor
:
333 case nir_intrinsic_image_deref_atomic_exchange
:
334 case nir_intrinsic_image_deref_atomic_comp_swap
:
335 case nir_intrinsic_image_deref_atomic_fadd
:
336 case nir_intrinsic_image_atomic_add
:
337 case nir_intrinsic_image_atomic_imin
:
338 case nir_intrinsic_image_atomic_umin
:
339 case nir_intrinsic_image_atomic_imax
:
340 case nir_intrinsic_image_atomic_umax
:
341 case nir_intrinsic_image_atomic_and
:
342 case nir_intrinsic_image_atomic_or
:
343 case nir_intrinsic_image_atomic_xor
:
344 case nir_intrinsic_image_atomic_exchange
:
345 case nir_intrinsic_image_atomic_comp_swap
:
346 case nir_intrinsic_image_atomic_fadd
:
347 case nir_intrinsic_bindless_image_atomic_add
:
348 case nir_intrinsic_bindless_image_atomic_imin
:
349 case nir_intrinsic_bindless_image_atomic_umin
:
350 case nir_intrinsic_bindless_image_atomic_imax
:
351 case nir_intrinsic_bindless_image_atomic_umax
:
352 case nir_intrinsic_bindless_image_atomic_and
:
353 case nir_intrinsic_bindless_image_atomic_or
:
354 case nir_intrinsic_bindless_image_atomic_xor
:
355 case nir_intrinsic_bindless_image_atomic_exchange
:
356 case nir_intrinsic_bindless_image_atomic_comp_swap
:
357 case nir_intrinsic_bindless_image_atomic_fadd
:
358 case nir_intrinsic_shared_atomic_add
:
359 case nir_intrinsic_shared_atomic_imin
:
360 case nir_intrinsic_shared_atomic_umin
:
361 case nir_intrinsic_shared_atomic_imax
:
362 case nir_intrinsic_shared_atomic_umax
:
363 case nir_intrinsic_shared_atomic_and
:
364 case nir_intrinsic_shared_atomic_or
:
365 case nir_intrinsic_shared_atomic_xor
:
366 case nir_intrinsic_shared_atomic_exchange
:
367 case nir_intrinsic_shared_atomic_comp_swap
:
368 case nir_intrinsic_shared_atomic_fadd
:
369 case nir_intrinsic_shared_atomic_fmin
:
370 case nir_intrinsic_shared_atomic_fmax
:
371 case nir_intrinsic_shared_atomic_fcomp_swap
:
372 case nir_intrinsic_global_atomic_add
:
373 case nir_intrinsic_global_atomic_imin
:
374 case nir_intrinsic_global_atomic_umin
:
375 case nir_intrinsic_global_atomic_imax
:
376 case nir_intrinsic_global_atomic_umax
:
377 case nir_intrinsic_global_atomic_and
:
378 case nir_intrinsic_global_atomic_or
:
379 case nir_intrinsic_global_atomic_xor
:
380 case nir_intrinsic_global_atomic_exchange
:
381 case nir_intrinsic_global_atomic_comp_swap
:
382 case nir_intrinsic_global_atomic_fadd
:
383 case nir_intrinsic_global_atomic_fmin
:
384 case nir_intrinsic_global_atomic_fmax
:
385 case nir_intrinsic_global_atomic_fcomp_swap
:
386 case nir_intrinsic_atomic_counter_add
:
387 case nir_intrinsic_atomic_counter_min
:
388 case nir_intrinsic_atomic_counter_max
:
389 case nir_intrinsic_atomic_counter_and
:
390 case nir_intrinsic_atomic_counter_or
:
391 case nir_intrinsic_atomic_counter_xor
:
392 case nir_intrinsic_atomic_counter_inc
:
393 case nir_intrinsic_atomic_counter_pre_dec
:
394 case nir_intrinsic_atomic_counter_post_dec
:
395 case nir_intrinsic_atomic_counter_exchange
:
396 case nir_intrinsic_atomic_counter_comp_swap
:
397 case nir_intrinsic_atomic_counter_add_deref
:
398 case nir_intrinsic_atomic_counter_min_deref
:
399 case nir_intrinsic_atomic_counter_max_deref
:
400 case nir_intrinsic_atomic_counter_and_deref
:
401 case nir_intrinsic_atomic_counter_or_deref
:
402 case nir_intrinsic_atomic_counter_xor_deref
:
403 case nir_intrinsic_atomic_counter_inc_deref
:
404 case nir_intrinsic_atomic_counter_pre_dec_deref
:
405 case nir_intrinsic_atomic_counter_post_dec_deref
:
406 case nir_intrinsic_atomic_counter_exchange_deref
:
407 case nir_intrinsic_atomic_counter_comp_swap_deref
:
408 case nir_intrinsic_exclusive_scan
:
409 case nir_intrinsic_ballot_bit_count_exclusive
:
410 case nir_intrinsic_ballot_bit_count_inclusive
:
411 case nir_intrinsic_write_invocation_amd
:
412 case nir_intrinsic_mbcnt_amd
:
421 nir_print_instr(&instr
->instr
, stderr
);
422 unreachable("\nNIR divergence analysis: Unhandled intrinsic.");
426 divergent
[instr
->dest
.ssa
.index
] = is_divergent
;
431 visit_tex(bool *divergent
, nir_tex_instr
*instr
)
433 if (divergent
[instr
->dest
.ssa
.index
])
436 bool is_divergent
= false;
438 for (unsigned i
= 0; i
< instr
->num_srcs
; i
++) {
439 switch (instr
->src
[i
].src_type
) {
440 case nir_tex_src_sampler_deref
:
441 case nir_tex_src_sampler_handle
:
442 case nir_tex_src_sampler_offset
:
443 is_divergent
|= divergent
[instr
->src
[i
].src
.ssa
->index
] &&
444 instr
->sampler_non_uniform
;
446 case nir_tex_src_texture_deref
:
447 case nir_tex_src_texture_handle
:
448 case nir_tex_src_texture_offset
:
449 is_divergent
|= divergent
[instr
->src
[i
].src
.ssa
->index
] &&
450 instr
->texture_non_uniform
;
453 is_divergent
|= divergent
[instr
->src
[i
].src
.ssa
->index
];
458 divergent
[instr
->dest
.ssa
.index
] = is_divergent
;
463 visit_phi(bool *divergent
, nir_phi_instr
*instr
)
465 /* There are 3 types of phi instructions:
466 * (1) gamma: represent the joining point of different paths
467 * created by an “if-then-else” branch.
468 * The resulting value is divergent if the branch condition
469 * or any of the source values is divergent.
471 * (2) mu: which only exist at loop headers,
472 * merge initial and loop-carried values.
473 * The resulting value is divergent if any source value
474 * is divergent or a divergent loop continue condition
475 * is associated with a different ssa-def.
477 * (3) eta: represent values that leave a loop.
478 * The resulting value is divergent if the source value is divergent
479 * or any loop exit condition is divergent for a value which is
480 * not loop-invariant.
481 * (note: there should be no phi for loop-invariant variables.)
484 if (divergent
[instr
->dest
.ssa
.index
])
487 nir_foreach_phi_src(src
, instr
) {
488 /* if any source value is divergent, the resulting value is divergent */
489 if (divergent
[src
->src
.ssa
->index
]) {
490 divergent
[instr
->dest
.ssa
.index
] = true;
495 nir_cf_node
*prev
= nir_cf_node_prev(&instr
->instr
.block
->cf_node
);
498 /* mu: if no predecessor node exists, the phi must be at a loop header */
499 nir_loop
*loop
= nir_cf_node_as_loop(instr
->instr
.block
->cf_node
.parent
);
500 prev
= nir_cf_node_prev(&loop
->cf_node
);
501 nir_ssa_def
* same
= NULL
;
502 bool all_same
= true;
504 /* first, check if all loop-carried values are from the same ssa-def */
505 nir_foreach_phi_src(src
, instr
) {
506 if (src
->pred
== nir_cf_node_as_block(prev
))
508 if (src
->src
.ssa
->parent_instr
->type
== nir_instr_type_ssa_undef
)
512 else if (same
!= src
->src
.ssa
)
516 /* if all loop-carried values are the same, the resulting value is uniform */
520 /* check if the loop-carried values come from different ssa-defs
521 * and the corresponding condition is divergent. */
522 nir_foreach_phi_src(src
, instr
) {
523 /* skip the loop preheader */
524 if (src
->pred
== nir_cf_node_as_block(prev
))
527 /* skip the unconditional back-edge */
528 if (src
->pred
== nir_loop_last_block(loop
))
531 /* if the value is undef, we don't need to check the condition */
532 if (src
->src
.ssa
->parent_instr
->type
== nir_instr_type_ssa_undef
)
535 nir_cf_node
*current
= src
->pred
->cf_node
.parent
;
536 /* check recursively the conditions if any is divergent */
537 while (current
->type
!= nir_cf_node_loop
) {
538 assert (current
->type
== nir_cf_node_if
);
539 nir_if
*if_node
= nir_cf_node_as_if(current
);
540 if (divergent
[if_node
->condition
.ssa
->index
]) {
541 divergent
[instr
->dest
.ssa
.index
] = true;
544 current
= current
->parent
;
546 assert(current
== &loop
->cf_node
);
549 } else if (prev
->type
== nir_cf_node_if
) {
550 /* if only one of the incoming values is defined, the resulting value is uniform */
551 unsigned defined_srcs
= 0;
552 nir_foreach_phi_src(src
, instr
) {
553 if (src
->src
.ssa
->parent_instr
->type
!= nir_instr_type_ssa_undef
)
556 if (defined_srcs
<= 1)
559 /* gamma: check if the condition is divergent */
560 nir_if
*if_node
= nir_cf_node_as_if(prev
);
561 if (divergent
[if_node
->condition
.ssa
->index
]) {
562 divergent
[instr
->dest
.ssa
.index
] = true;
567 /* eta: the predecessor must be a loop */
568 assert(prev
->type
== nir_cf_node_loop
);
570 /* Check if any loop exit condition is divergent:
571 * That is any break happens under divergent condition or
572 * a break is preceeded by a divergent continue
574 nir_foreach_phi_src(src
, instr
) {
575 nir_cf_node
*current
= src
->pred
->cf_node
.parent
;
577 /* check recursively the conditions if any is divergent */
578 while (current
->type
!= nir_cf_node_loop
) {
579 assert(current
->type
== nir_cf_node_if
);
580 nir_if
*if_node
= nir_cf_node_as_if(current
);
581 if (divergent
[if_node
->condition
.ssa
->index
]) {
582 divergent
[instr
->dest
.ssa
.index
] = true;
585 current
= current
->parent
;
587 assert(current
== prev
);
589 /* check if any divergent continue happened before the break */
590 nir_foreach_block_in_cf_node(block
, prev
) {
591 if (block
== src
->pred
)
593 if (!nir_block_ends_in_jump(block
))
596 nir_jump_instr
*jump
= nir_instr_as_jump(nir_block_last_instr(block
));
597 if (jump
->type
!= nir_jump_continue
)
600 current
= block
->cf_node
.parent
;
601 bool is_divergent
= false;
602 while (current
!= prev
) {
603 /* the continue belongs to an inner loop */
604 if (current
->type
== nir_cf_node_loop
) {
605 is_divergent
= false;
608 assert(current
->type
== nir_cf_node_if
);
609 nir_if
*if_node
= nir_cf_node_as_if(current
);
610 is_divergent
|= divergent
[if_node
->condition
.ssa
->index
];
611 current
= current
->parent
;
615 divergent
[instr
->dest
.ssa
.index
] = true;
626 visit_load_const(bool *divergent
, nir_load_const_instr
*instr
)
632 visit_ssa_undef(bool *divergent
, nir_ssa_undef_instr
*instr
)
638 nir_variable_mode_is_uniform(nir_variable_mode mode
) {
640 case nir_var_uniform
:
641 case nir_var_mem_ubo
:
642 case nir_var_mem_ssbo
:
643 case nir_var_mem_shared
:
644 case nir_var_mem_global
:
652 nir_variable_is_uniform(nir_variable
*var
, nir_divergence_options options
,
653 gl_shader_stage stage
)
655 if (nir_variable_mode_is_uniform(var
->data
.mode
))
658 if (stage
== MESA_SHADER_FRAGMENT
&&
659 (options
& nir_divergence_single_prim_per_subgroup
) &&
660 var
->data
.mode
== nir_var_shader_in
&&
661 var
->data
.interpolation
== INTERP_MODE_FLAT
)
664 if (stage
== MESA_SHADER_TESS_CTRL
&&
665 (options
& nir_divergence_single_patch_per_tcs_subgroup
) &&
666 var
->data
.mode
== nir_var_shader_out
&& var
->data
.patch
)
669 if (stage
== MESA_SHADER_TESS_EVAL
&&
670 (options
& nir_divergence_single_patch_per_tes_subgroup
) &&
671 var
->data
.mode
== nir_var_shader_in
&& var
->data
.patch
)
678 visit_deref(bool *divergent
, nir_deref_instr
*deref
,
679 nir_divergence_options options
, gl_shader_stage stage
)
681 if (divergent
[deref
->dest
.ssa
.index
])
684 bool is_divergent
= false;
685 switch (deref
->deref_type
) {
686 case nir_deref_type_var
:
687 is_divergent
= !nir_variable_is_uniform(deref
->var
, options
, stage
);
689 case nir_deref_type_array
:
690 case nir_deref_type_ptr_as_array
:
691 is_divergent
= divergent
[deref
->arr
.index
.ssa
->index
];
693 case nir_deref_type_struct
:
694 case nir_deref_type_array_wildcard
:
695 is_divergent
|= divergent
[deref
->parent
.ssa
->index
];
697 case nir_deref_type_cast
:
698 is_divergent
= !nir_variable_mode_is_uniform(deref
->var
->data
.mode
) ||
699 divergent
[deref
->parent
.ssa
->index
];
703 divergent
[deref
->dest
.ssa
.index
] = is_divergent
;
708 visit_block(bool *divergent
, nir_block
*block
, nir_divergence_options options
,
709 gl_shader_stage stage
)
711 bool has_changed
= false;
713 nir_foreach_instr(instr
, block
) {
714 switch (instr
->type
) {
715 case nir_instr_type_alu
:
716 has_changed
|= visit_alu(divergent
, nir_instr_as_alu(instr
));
718 case nir_instr_type_intrinsic
:
719 has_changed
|= visit_intrinsic(divergent
, nir_instr_as_intrinsic(instr
),
722 case nir_instr_type_tex
:
723 has_changed
|= visit_tex(divergent
, nir_instr_as_tex(instr
));
725 case nir_instr_type_phi
:
726 has_changed
|= visit_phi(divergent
, nir_instr_as_phi(instr
));
728 case nir_instr_type_load_const
:
729 has_changed
|= visit_load_const(divergent
, nir_instr_as_load_const(instr
));
731 case nir_instr_type_ssa_undef
:
732 has_changed
|= visit_ssa_undef(divergent
, nir_instr_as_ssa_undef(instr
));
734 case nir_instr_type_deref
:
735 has_changed
|= visit_deref(divergent
, nir_instr_as_deref(instr
),
738 case nir_instr_type_jump
:
740 case nir_instr_type_call
:
741 case nir_instr_type_parallel_copy
:
742 unreachable("NIR divergence analysis: Unsupported instruction type.");
750 visit_if(bool *divergent
, nir_if
*if_stmt
, nir_divergence_options options
, gl_shader_stage stage
)
752 return visit_cf_list(divergent
, &if_stmt
->then_list
, options
, stage
) |
753 visit_cf_list(divergent
, &if_stmt
->else_list
, options
, stage
);
757 visit_loop(bool *divergent
, nir_loop
*loop
, nir_divergence_options options
, gl_shader_stage stage
)
759 bool has_changed
= false;
762 /* TODO: restructure this and the phi handling more efficiently */
764 repeat
= visit_cf_list(divergent
, &loop
->body
, options
, stage
);
765 has_changed
|= repeat
;
772 visit_cf_list(bool *divergent
, struct exec_list
*list
,
773 nir_divergence_options options
, gl_shader_stage stage
)
775 bool has_changed
= false;
777 foreach_list_typed(nir_cf_node
, node
, node
, list
) {
778 switch (node
->type
) {
779 case nir_cf_node_block
:
780 has_changed
|= visit_block(divergent
, nir_cf_node_as_block(node
),
784 has_changed
|= visit_if(divergent
, nir_cf_node_as_if(node
),
787 case nir_cf_node_loop
:
788 has_changed
|= visit_loop(divergent
, nir_cf_node_as_loop(node
),
791 case nir_cf_node_function
:
792 unreachable("NIR divergence analysis: Unsupported cf_node type.");
801 nir_divergence_analysis(nir_shader
*shader
, nir_divergence_options options
)
803 nir_function_impl
*impl
= nir_shader_get_entrypoint(shader
);
804 bool *t
= rzalloc_array(shader
, bool, impl
->ssa_alloc
);
806 visit_cf_list(t
, &impl
->body
, options
, shader
->info
.stage
);